Merge tag 'amd-drm-next-5.9-2020-07-01' of git://people.freedesktop.org/~agd5f/linux into drm-next

amd-drm-next-5.9-2020-07-01:

amdgpu:
- DC DMUB updates
- HDCP fixes
- Thermal interrupt fixes
- Add initial support for Sienna Cichlid GPU
- Add support for unique id on Arcturus
- Major swSMU code cleanup
- Skip BAR resizing if the bios already did id
- Fixes for DCN bandwidth calculations
- Runtime PM reference count fixes
- Add initial UVD support for SI
- Add support for ASSR on eDP links
- Lots of misc fixes and cleanups
- Enable runtime PM on vega10 boards that support BACO
- RAS fixes
- SR-IOV fixes
- Use IP discovery table on renoir
- DC stream synchronization fixes

amdkfd:
- Track SDMA usage per process
- Fix GCC10 compiler warnings
- Locking fix

radeon:
- Default to on chip GART for AGP boards on all arches
- Runtime PM reference count fixes

UAPI:
- Update comments to clarify MTYPE

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200701155041.1102829-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie 2020-07-02 15:17:31 +10:00
commit 9555152beb
358 changed files with 222354 additions and 8606 deletions

View file

@ -63,12 +63,13 @@ amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o \
uvd_v3_1.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
arct_reg_init.o navi12_reg_init.o mxgpu_nv.o
arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o
# add DF block
amdgpu-y += \
@ -80,7 +81,7 @@ amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o
# add UMC block
amdgpu-y += \
@ -129,7 +130,8 @@ amdgpu-y += \
sdma_v2_4.o \
sdma_v3_0.o \
sdma_v4_0.o \
sdma_v5_0.o
sdma_v5_0.o \
sdma_v5_2.o
# add MES block
amdgpu-y += \
@ -154,15 +156,18 @@ amdgpu-y += \
vcn_v1_0.o \
vcn_v2_0.o \
vcn_v2_5.o \
vcn_v3_0.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o
jpeg_v2_5.o \
jpeg_v3_0.o
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o
athub_v2_0.o \
athub_v2_1.o
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
@ -172,12 +177,13 @@ AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_gfx_v10.o
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o \
amdgpu_amdkfd_gfx_v9.o \
amdgpu_amdkfd_arcturus.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o

View file

@ -186,8 +186,10 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
static const bool debug_evictions; /* = false */
#endif
extern int amdgpu_tmz;
@ -652,10 +654,6 @@ struct amdgpu_fw_vram_usage {
u64 size;
struct amdgpu_bo *reserved_bo;
void *va;
/* GDDR6 training support flag.
*/
bool mem_train_support;
};
/*
@ -990,9 +988,12 @@ struct amdgpu_device {
/* Chip product information */
char product_number[16];
char product_name[32];
char serial[16];
char serial[20];
struct amdgpu_autodump autodump;
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)

View file

@ -64,7 +64,9 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
struct amdgpu_encoder *encoder_for_bl;
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct backlight_device *bd;
#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
@ -444,45 +446,21 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
!amdgpu_device_has_dc_support(adev)) {
struct amdgpu_encoder *enc = atif->encoder_for_bl;
if (enc) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
backlight_force_update(dig->bl_dev,
BACKLIGHT_UPDATE_HOTKEY);
#endif
}
}
#if defined(CONFIG_DRM_AMD_DC)
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
amdgpu_device_has_dc_support(adev)) {
struct amdgpu_display_manager *dm = &adev->dm;
struct backlight_device *bd = dm->backlight_dev;
if (bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
/*
* XXX backlight_device_set_brightness() is
* hardwired to post BACKLIGHT_UPDATE_SYSFS.
* It probably should accept 'reason' parameter.
*/
backlight_device_set_brightness(bd, req.backlight_level);
backlight_device_set_brightness(atif->bd, req.backlight_level);
}
#endif
}
#endif
#endif
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
if (adev->flags & AMD_IS_PX) {
pm_runtime_get_sync(adev->ddev->dev);
@ -829,23 +807,32 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
adev->atif = atif;
if (atif->notifications.brightness_change) {
struct drm_encoder *tmp;
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (amdgpu_device_has_dc_support(adev)) {
#if defined(CONFIG_DRM_AMD_DC)
struct amdgpu_display_manager *dm = &adev->dm;
atif->bd = dm->backlight_dev;
#endif
} else {
struct drm_encoder *tmp;
/* Find the encoder controlling the brightness */
list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list,
head) {
struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
/* Find the encoder controlling the brightness */
list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list,
head) {
struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
enc->enc_priv) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
if (dig->bl_dev) {
atif->encoder_for_bl = enc;
break;
if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
enc->enc_priv) {
struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
if (dig->bl_dev) {
atif->bd = dig->bl_dev;
break;
}
}
}
}
}
#endif
if (atif->functions.sbios_requests && !atif->functions.system_params) {
/* XXX check this workraround, if sbios request function is

View file

@ -0,0 +1,834 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h"
#include "navi10_enum.h"
#include "oss/osssys_5_0_0_offset.h"
#include "oss/osssys_5_0_0_sh_mask.h"
#include "soc15_common.h"
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
#include "gfxhub_v2_1.h"
enum hqd_dequeue_request_type {
NO_ACTION = 0,
DRAIN_PIPE,
RESET_WAVES,
SAVE_WAVES
};
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
}
static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, mec, pipe, queue, vmid);
}
static void unlock_srbm(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
queue_id;
return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
{
unlock_srbm(kgd);
}
static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
lock_srbm(kgd, 0, 0, 0, vmid);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
unlock_srbm(kgd);
}
/* ATC is defeatured on Sienna_Cichlid */
static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
/* Mapping vmid to pasid also for IH block */
pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
vmid, pasid);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value);
return 0;
}
static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(kgd, mec, pipe, 0, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(kgd);
return 0;
}
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
uint32_t sdma_engine_reg_base = 0;
uint32_t sdma_rlc_reg_offset;
switch (engine_id) {
default:
dev_warn(adev->dev,
"Invalid sdma engine id (%d), using engine id 0\n",
engine_id);
/* fall through */
case 0:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
case 1:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
break;
case 2:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
break;
case 3:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
break;
}
sdma_rlc_reg_offset = sdma_engine_reg_base
+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
queue_id, sdma_rlc_reg_offset);
return sdma_rlc_reg_offset;
}
static inline struct v10_compute_mqd *get_mqd(void *mqd)
{
return (struct v10_compute_mqd *)mqd;
}
static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
{
return (struct v10_sdma_mqd *)mqd;
}
static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, data;
m = get_mqd(mqd);
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
acquire_queue(kgd, pipe_id, queue_id);
/* HIQ is set during driver init period with vmid set to 0*/
if (m->cp_hqd_vmid == 0) {
uint32_t value, mec, pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
WREG32(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
if (wptr) {
/* Don't read wptr with get_user because the user
* context may not be accessible (if this function
* runs in a work queue). Instead trigger a one-shot
* polling read from memory in the CP. This assumes
* that wptr is GPU-accessible in the queue's VMID via
* ATC or SVM. WPTR==RPTR before starting the poll so
* the CP starts fetching new commands from the right
* place.
*
* Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
* tricky. Assume that the queue didn't overflow. The
* number of valid bits in the 32-bit RPTR depends on
* the queue size. The remaining bits are taken from
* the saved 64-bit WPTR. If the WPTR wrapped, add the
* queue size.
*/
uint32_t queue_size =
2 << REG_GET_FIELD(m->cp_hqd_pq_control,
CP_HQD_PQ_CONTROL, QUEUE_SIZE);
uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
guessed_wptr += queue_size;
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uint64_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uint64_t)wptr));
pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
(uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
release_queue(kgd);
return 0;
}
static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
uint32_t mec, pipe;
int r;
m = get_mqd(mqd);
acquire_queue(kgd, pipe_id, queue_id);
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
goto out_unlock;
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
amdgpu_ring_write(kiq_ring,
PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
PACKET3_MAP_QUEUES_QUEUE(queue_id) |
PACKET3_MAP_QUEUES_PIPE(pipe) |
PACKET3_MAP_QUEUES_ME((mec - 1)) |
PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
amdgpu_ring_write(kiq_ring,
PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
amdgpu_ring_commit(kiq_ring);
out_unlock:
spin_unlock(&adev->gfx.kiq.ring_lock);
release_queue(kgd);
return r;
}
static int hqd_dump_v10_3(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
(*dump)[i++][1] = RREG32(addr); \
} while (0)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
acquire_queue(kgd, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
release_queue(kgd);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t act;
bool retval = false;
uint32_t low, high;
acquire_queue(kgd, pipe_id, queue_id);
act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
if (act) {
low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8);
if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
retval = true;
}
release_queue(kgd);
return retval;
}
static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
return false;
}
static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
uint32_t temp;
struct v10_compute_mqd *m = get_mqd(mqd);
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
type = DRAIN_PIPE;
break;
case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
type = RESET_WAVES;
break;
default:
type = DRAIN_PIPE;
break;
}
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("cp queue pipe %d queue %d preemption failed\n",
pipe_id, queue_id);
release_queue(kgd);
return -ETIME;
}
usleep_range(500, 1000);
}
release_queue(kgd);
return 0;
}
static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
unsigned int utimeout)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies)) {
pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
}
usleep_range(500, 1000);
}
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
static int address_watch_disable_v10_3(struct kgd_dev *kgd)
{
return 0;
}
static int address_watch_execute_v10_3(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo)
{
return 0;
}
static int wave_control_execute_v10_3(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SA_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset)
{
return 0;
}
static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
/* SDMA is on gfxhub as well for Navi1* series */
gfxhub_v2_1_setup_vm_pt_regs(adev, vmid, page_table_base);
}
#if 0
uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
uint32_t trap_debug_wave_launch_mode,
uint32_t vmid)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
uint32_t orig_wave_cntl_value;
uint32_t orig_stall_vmid;
mutex_lock(&adev->grbm_idx_mutex);
orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
0,
mmSPI_GDBG_WAVE_CNTL));
orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
SPI_GDBG_WAVE_CNTL,
STALL_VMID);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
data = 0;
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd,
uint32_t trap_override,
uint32_t trap_mask)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
EXCP_EN, trap_mask);
data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
REPLACE, trap_override);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd,
uint8_t wave_launch_mode,
uint32_t vmid)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint32_t data = 0;
bool is_stall_mode;
bool is_mode_set;
is_stall_mode = (wave_launch_mode == 4);
is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
mutex_lock(&adev->grbm_idx_mutex);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
VMID_MASK, is_mode_set ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
MODE, is_mode_set ? wave_launch_mode : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
STALL_VMID, is_stall_mode ? 1 << vmid : 0);
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
STALL_RA, is_stall_mode ? 1 : 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void get_iq_wait_times_v10_3(struct kgd_dev *kgd,
uint32_t *wait_times)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = mmCP_IQ_WAIT_TIME2;
}
#endif
const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v10_3,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
.init_interrupts = init_interrupts_v10_3,
.hqd_load = hqd_load_v10_3,
.hiq_mqd_load = hiq_mqd_load_v10_3,
.hqd_sdma_load = hqd_sdma_load_v10_3,
.hqd_dump = hqd_dump_v10_3,
.hqd_sdma_dump = hqd_sdma_dump_v10_3,
.hqd_is_occupied = hqd_is_occupied_v10_3,
.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
.hqd_destroy = hqd_destroy_v10_3,
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
.address_watch_disable = address_watch_disable_v10_3,
.address_watch_execute = address_watch_execute_v10_3,
.wave_control_execute = wave_control_execute_v10_3,
.address_watch_get_offset = address_watch_get_offset_v10_3,
.get_atc_vmid_pasid_mapping_info = NULL,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
#if 0
.enable_debug_trap = enable_debug_trap_v10_3,
.disable_debug_trap = disable_debug_trap_v10_3,
.set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
.set_wave_launch_mode = set_wave_launch_mode_v10_3,
.get_iq_wait_times = get_iq_wait_times_v10_3,
.build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
#endif
};

View file

@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
return amdgpu_sync_fence(sync, vm->last_update, false);
return amdgpu_sync_fence(sync, vm->last_update);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
amdgpu_sync_fence(sync, bo_va->last_pt_update);
return 0;
}
@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;

View file

@ -2022,11 +2022,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
if (adev->is_atom_fw) {
amdgpu_atomfirmware_scratch_regs_init(adev);
amdgpu_atomfirmware_allocate_fb_scratch(adev);
ret = amdgpu_atomfirmware_get_mem_train_info(adev);
if (ret) {
DRM_ERROR("Failed to get mem train fb location.\n");
return ret;
}
} else {
amdgpu_atombios_scratch_regs_init(adev);
amdgpu_atombios_allocate_fb_scratch(adev);

View file

@ -120,11 +120,13 @@ union umc_info {
union vram_info {
struct atom_vram_info_header_v2_3 v23;
struct atom_vram_info_header_v2_4 v24;
struct atom_vram_info_header_v2_5 v25;
};
union vram_module {
struct atom_vram_module_v9 v9;
struct atom_vram_module_v10 v10;
struct atom_vram_module_v11 v11;
};
static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
@ -260,6 +262,26 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
case 5:
if (module_id > vram_info->v25.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v25.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v11.vram_module_size);
i++;
}
mem_type = vram_module->v11.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v11.channel_num;
mem_channel_width = vram_module->v11.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
default:
return -EINVAL;
}
@ -303,6 +325,9 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
union firmware_info {
struct atom_firmware_info_v3_1 v31;
struct atom_firmware_info_v3_2 v32;
struct atom_firmware_info_v3_3 v33;
struct atom_firmware_info_v3_4 v34;
};
/*
@ -491,7 +516,7 @@ static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
return false;
}
static int gddr6_mem_train_support(struct amdgpu_device *adev)
int amdgpu_mem_train_support(struct amdgpu_device *adev)
{
int ret;
uint32_t major, minor, revision, hw_v;
@ -507,6 +532,7 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev)
switch (hw_v) {
case HW_REV(11, 0, 0):
case HW_REV(11, 0, 5):
case HW_REV(11, 0, 7):
ret = 1;
break;
default:
@ -525,46 +551,37 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev)
return ret;
}
int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev)
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
union firmware_info *firmware_info;
int index;
uint8_t frev, crev;
uint16_t data_offset, size;
int ret;
adev->fw_vram_usage.mem_train_support = false;
if (adev->asic_type != CHIP_NAVI10 &&
adev->asic_type != CHIP_NAVI14)
return 0;
if (amdgpu_sriov_vf(adev))
return 0;
ret = gddr6_mem_train_support(adev);
if (ret == -1)
return -EINVAL;
else if (ret == 0)
return 0;
u16 data_offset, size;
u8 frev, crev;
int fw_reserved_fb_size;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_usagebyfirmware);
ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev,
&data_offset);
if (ret == 0) {
DRM_ERROR("parse data header failed.\n");
firmwareinfo);
if (!amdgpu_atom_parse_data_header(ctx, index, &size,
&frev, &crev, &data_offset))
/* fail to parse data_header */
return 0;
firmware_info = (union firmware_info *)(ctx->bios + data_offset);
if (frev !=3)
return -EINVAL;
switch (crev) {
case 4:
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
default:
fw_reserved_fb_size = 0;
break;
}
DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x,"
" crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset);
/* only support 2.1+ */
if (((uint16_t)frev << 8 | crev) < 0x0201) {
DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev);
return -EINVAL;
}
adev->fw_vram_usage.mem_train_support = true;
return 0;
return fw_reserved_fb_size;
}

View file

@ -31,10 +31,11 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type, int *vram_vendor);
int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
int amdgpu_mem_train_support(struct amdgpu_device *adev);
#endif

View file

@ -716,8 +716,10 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
}
}
if (encoder) {
@ -854,8 +856,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
}
}
encoder = amdgpu_connector_best_single_encoder(connector);
@ -977,8 +981,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
}
}
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
@ -1328,8 +1334,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
}
}
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {

View file

@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
r = amdgpu_sync_fence(&p->job->sync, fence, true);
r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
if (r)
return r;
@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
r = amdgpu_sync_fence(&p->job->sync, fence, true);
r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
return r;

View file

@ -223,12 +223,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
*pos &= (1UL << 22) - 1;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@ -332,12 +336,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -387,12 +395,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -443,12 +455,16 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -498,12 +514,16 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -554,12 +574,16 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -609,12 +633,16 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -764,12 +792,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
valuesize = sizeof(values);
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
@ -842,12 +874,16 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
@ -937,11 +973,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
return r;
goto err;
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
return r;
goto err;
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
@ -967,7 +1003,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
value = data[result >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
result = r;
amdgpu_virt_disable_access_debugfs(adev);
goto err;
}
@ -976,10 +1012,14 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
size -= 4;
}
err:
kfree(data);
amdgpu_virt_disable_access_debugfs(adev);
return result;
err:
pm_runtime_put_autosuspend(adev->ddev->dev);
kfree(data);
return r;
}
/**
@ -1003,8 +1043,10 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
while (size) {
uint32_t value;
@ -1140,8 +1182,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
int r = 0, i;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* Avoid accidently unparking the sched thread during GPU reset */
mutex_lock(&adev->lock_reset);
@ -1197,8 +1241,10 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
@ -1216,8 +1262,10 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
@ -1407,16 +1455,16 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
return -EINVAL;
ret = pm_runtime_get_sync(adev->ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true);
if (ret || val > max_freq || val < min_freq)
return -EINVAL;
ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true);
} else {
return 0;
}
pm_runtime_mark_last_busy(adev->ddev->dev);

View file

@ -80,6 +80,7 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@ -112,6 +113,7 @@ const char *amdgpu_asic_name[] = {
"NAVI10",
"NAVI14",
"NAVI12",
"SIENNA_CICHLID",
"LAST",
};
@ -907,6 +909,11 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
return 0;
/* Check if the root BUS has 64bit memory resources */
root = adev->pdev->bus;
while (root->parent)
@ -1159,6 +1166,16 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_fragment_size = -1;
}
if (amdgpu_sched_hw_submission < 2) {
dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
amdgpu_sched_hw_submission);
amdgpu_sched_hw_submission = 2;
} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
amdgpu_sched_hw_submission);
amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
}
amdgpu_device_check_smu_prv_buffer_size(adev);
amdgpu_device_check_vm_size(adev);
@ -1527,22 +1544,25 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
char fw_name[30];
char fw_name[40];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
if (adev->discovery_bin) {
amdgpu_discovery_get_gfx_info(adev);
/*
* FIXME: The bounding box is still needed by Navi12, so
* temporarily read it from gpu_info firmware. Should be droped
* when DAL no longer needs it.
*/
if (adev->asic_type != CHIP_NAVI12)
return 0;
}
switch (adev->asic_type) {
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
@ -1557,6 +1577,15 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_VEGA20:
default:
return 0;
@ -1589,6 +1618,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_NAVI12:
chip_name = "navi12";
break;
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@ -1617,10 +1649,11 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
amdgpu_discovery_get_gfx_info(adev);
/*
* Should be droped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
}
adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
@ -1653,7 +1686,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
parse_soc_bounding_box:
/*
* soc bounding box info is not integrated in disocovery table,
* we always need to parse it from gpu info firmware.
* we always need to parse it from gpu info firmware if needed.
*/
if (hdr->version_minor == 2) {
const struct gpu_info_firmware_v1_2 *gpu_info_fw =
@ -1690,24 +1723,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
amdgpu_device_enable_virtual_display(adev);
switch (adev->asic_type) {
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
adev->family = AMDGPU_FAMILY_CZ;
else
adev->family = AMDGPU_FAMILY_VI;
r = vi_set_ip_blocks(adev);
if (r)
return r;
break;
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
@ -1726,24 +1741,41 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
adev->family = AMDGPU_FAMILY_CI;
else
if (adev->flags & AMD_IS_APU)
adev->family = AMDGPU_FAMILY_KV;
else
adev->family = AMDGPU_FAMILY_CI;
r = cik_set_ip_blocks(adev);
if (r)
return r;
break;
#endif
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->flags & AMD_IS_APU)
adev->family = AMDGPU_FAMILY_CZ;
else
adev->family = AMDGPU_FAMILY_VI;
r = vi_set_ip_blocks(adev);
if (r)
return r;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
case CHIP_ARCTURUS:
case CHIP_RENOIR:
if (adev->asic_type == CHIP_RAVEN ||
adev->asic_type == CHIP_RENOIR)
if (adev->flags & AMD_IS_APU)
adev->family = AMDGPU_FAMILY_RV;
else
adev->family = AMDGPU_FAMILY_AI;
@ -1755,6 +1787,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
adev->family = AMDGPU_FAMILY_NV;
r = nv_set_ip_blocks(adev);
@ -2308,6 +2341,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
amdgpu_virt_release_ras_err_handler_data(adev);
amdgpu_ras_pre_fini(adev);
if (adev->gmc.xgmi.num_physical_nodes > 1)
@ -2778,6 +2814,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_RENOIR:
#endif
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
#endif
return amdgpu_dc != 0;
#endif
@ -3036,6 +3075,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
atomic_set(&adev->throttling_logging_enabled, 1);
/*
* If throttling continues, logging will be performed every minute
* to avoid log flooding. "-1" is subtracted since the thermal
* throttling interrupt comes every second. Thus, the total logging
* interval is 59 seconds(retelimited printk interval) + 1(waiting
* for throttling interrupt) = 60 seconds.
*/
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
@ -3330,10 +3380,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
if (adev->firmware.gpu_info_fw) {
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
}
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
@ -3365,7 +3413,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
if (adev->discovery_bin)
amdgpu_discovery_fini(adev);
}
@ -3377,7 +3425,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
* @suspend: suspend state
* @fbcon : notify the fbdev of suspend
*
* Puts the hw in the suspend state (all asics).
@ -3474,7 +3521,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
* @resume: resume state
* @fbcon : notify the fbdev of resume
*
* Bring the hw back to operating state (all asics).

View file

@ -32,7 +32,7 @@
#define mmMM_DATA 0x1
#define HW_ID_MAX 300
const char *hw_id_names[HW_ID_MAX] = {
static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID] = "MP1",
[MP2_HWID] = "MP2",
[THM_HWID] = "THM",

View file

@ -282,7 +282,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
return ret;
goto out;
ret = drm_crtc_helper_set_config(set, ctx);
@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
take the current one */
if (active && !adev->have_disp_power_ref) {
adev->have_disp_power_ref = true;
return ret;
goto out;
}
/* if we have no active crtcs, then drop the power ref
we got before */
@ -306,6 +306,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
adev->have_disp_power_ref = false;
}
out:
/* drop the power reference we got coming in here */
pm_runtime_put_autosuspend(dev->dev);
return ret;

View file

@ -53,6 +53,7 @@ struct amdgpu_doorbell_index {
uint32_t gfx_ring0;
uint32_t gfx_ring1;
uint32_t sdma_engine[8];
uint32_t mes_ring;
uint32_t ih;
union {
struct {
@ -177,9 +178,12 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090,
/* SDMA:256~335*/
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2 = 0x114,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3 = 0x11E,
/* IH: 376~391 */
AMDGPU_NAVI10_DOORBELL_IH = 0x178,
/* MMSCH: 392~407
@ -191,8 +195,13 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A,
AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B,
AMDGPU_NAVI10_DOORBELL64_VCN8_9 = 0x18C,
AMDGPU_NAVI10_DOORBELL64_VCNa_b = 0x18D,
AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7,
AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF

View file

@ -1162,7 +1162,7 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
{
int ret = 0;
if (is_support_sw_smu_xgmi(adev))
if (is_support_sw_smu(adev))
ret = smu_set_xgmi_pstate(&adev->smu, pstate);
else if (adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_xgmi_pstate)
@ -1197,4 +1197,4 @@ int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
return smu_allow_xgmi_power_down(smu, en);
return 0;
}
}

View file

@ -87,9 +87,10 @@
* - 3.36.0 - Allow reading more status registers on si/cik
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
* - 3.39.0 - DMABUF implicit sync does a full pipeline sync
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 38
#define KMS_DRIVER_MINOR 39
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@ -705,6 +706,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false =
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
/**
* DOC: debug_evictions(bool)
* Enable extra debug messages to help determine the cause of evictions
*/
bool debug_evictions;
module_param(debug_evictions, bool, 0644);
MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
#endif
/**
@ -1111,7 +1120,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
amdgpu_driver_load_kms(dev, ent->driver_data);
ret = amdgpu_driver_load_kms(dev, ent->driver_data);
if (ret)
goto err_pci;
retry_init:
ret = drm_dev_register(dev, ent->driver_data);
@ -1373,11 +1384,12 @@ long amdgpu_drm_ioctl(struct file *filp,
dev = file_priv->minor->dev;
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
return ret;
goto out;
ret = drm_ioctl(filp, cmd, arg);
pm_runtime_mark_last_busy(dev->dev);
out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
}

View file

@ -416,7 +416,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
if (irq_src)
amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
ring->fence_drv.irq_type = irq_type;
@ -448,8 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
if (!adev)
return -EINVAL;
/* Check that num_hw_submission is a power of two */
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
if (!is_power_of_2(num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@ -467,8 +468,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
if (!ring->fence_drv.fences)
return -ENOMEM;
/* No need to setup the GPU scheduler for KIQ ring */
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
/* No need to setup the GPU scheduler for rings that don't need it */
if (!ring->no_scheduler) {
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
@ -537,9 +538,11 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(ring);
}
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
drm_sched_fini(&ring->sched);
if (ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
if (!ring->no_scheduler)
drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]);
@ -574,8 +577,9 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
}
/* disable the interrupt */
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
if (ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
}
@ -601,8 +605,9 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
continue;
/* enable the interrupt */
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
if (ring->fence_drv.irq_src)
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
}
@ -749,8 +754,10 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(dev->dev);
return 0;
}
seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL);

View file

@ -26,12 +26,13 @@
#include "amdgpu_i2c.h"
#include "smu_v11_0_i2c.h"
#include "atom.h"
#include "amdgpu_fru_eeprom.h"
#define I2C_PRODUCT_INFO_ADDR 0xAC
#define I2C_PRODUCT_INFO_ADDR_SIZE 0x2
#define I2C_PRODUCT_INFO_OFFSET 0xC0
bool is_fru_eeprom_supported(struct amdgpu_device *adev)
static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
{
/* TODO: Gaming SKUs don't have the FRU EEPROM.
* Use this hack to address hangs on modprobe on gaming SKUs
@ -47,7 +48,7 @@ bool is_fru_eeprom_supported(struct amdgpu_device *adev)
return false;
}
int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
unsigned char *buff)
{
int ret, size;

View file

@ -21,8 +21,8 @@
*
*/
#ifndef __AMDGPU_PRODINFO_H__
#define __AMDGPU_PRODINFO_H__
#ifndef __AMDGPU_FRU_EEPROM_H__
#define __AMDGPU_FRU_EEPROM_H__
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);

View file

@ -134,6 +134,7 @@ struct gb_addr_config {
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
uint8_t num_pkrs;
};
struct amdgpu_gfx_config {

View file

@ -357,6 +357,9 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
ring = adev->rings[i];
vmhub = ring->funcs->vmhub;
if (ring == &adev->mes.ring)
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
if (!inv_eng) {
dev_err(adev->dev, "no VM inv eng for ring %s\n",

View file

@ -268,8 +268,6 @@ err_out:
* amdgpu_gtt_mgr_del - free ranges
*
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
* @mem: TTM memory object
*
* Free the allocated GTT again.

View file

@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;

View file

@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
return amdgpu_sync_fence(sync, ring->vmid_wait, false);
return amdgpu_sync_fence(sync, ring->vmid_wait);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return -ENOMEM;
}
r = amdgpu_sync_fence(sync, &array->base, false);
r = amdgpu_sync_fence(sync, &array->base);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base;
return r;
@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
r = amdgpu_sync_fence(sync, tmp, false);
r = amdgpu_sync_fence(sync, tmp);
return r;
}
needs_flush = true;
@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active, fence, false);
r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active, fence, false);
r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle;
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(&id->active, fence, false);
r = amdgpu_sync_fence(&id->active, fence);
if (r)
goto error;

View file

@ -183,16 +183,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
bool explicit = false;
int r;
fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
r = amdgpu_sync_fence(&job->sched_sync, fence, false);
if (r)
DRM_ERROR("Error adding fence (%d)\n", r);
}
fence = amdgpu_sync_get_fence(&job->sync);
if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
r = amdgpu_sync_fence(&job->sched_sync, fence);
if (r)
DRM_ERROR("Error adding fence (%d)\n", r);
}
while (fence == NULL && vm && !job->vmid) {
@ -202,7 +199,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
fence = amdgpu_sync_get_fence(&job->sync, NULL);
fence = amdgpu_sync_get_fence(&job->sync);
}
return fence;

View file

@ -37,6 +37,8 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
mutex_init(&adev->jpeg.jpeg_pg_lock);
atomic_set(&adev->jpeg.total_submission_cnt, 0);
return 0;
}
@ -54,6 +56,8 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
}
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
return 0;
}
@ -83,7 +87,7 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
}
if (fences == 0)
if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_GATE);
else
@ -93,15 +97,19 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (set_clocks)
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
atomic_inc(&adev->jpeg.total_submission_cnt);
cancel_delayed_work_sync(&adev->jpeg.idle_work);
mutex_lock(&adev->jpeg.jpeg_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_UNGATE);
mutex_unlock(&adev->jpeg.jpeg_pg_lock);
}
void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
{
atomic_dec(&ring->adev->jpeg.total_submission_cnt);
schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
}

View file

@ -46,6 +46,8 @@ struct amdgpu_jpeg {
unsigned harvest_config;
struct delayed_work idle_work;
enum amd_powergating_state cur_state;
struct mutex jpeg_pg_lock;
atomic_t total_submission_cnt;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);

View file

@ -167,18 +167,33 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
}
if (amdgpu_device_supports_boco(dev) &&
(amdgpu_runtime_pm != 0)) /* enable runpm by default for boco */
adev->runpm = true;
else if (amdgpu_device_supports_baco(dev) &&
(amdgpu_runtime_pm != 0) &&
(adev->asic_type >= CHIP_TOPAZ) &&
(adev->asic_type != CHIP_VEGA10) &&
(adev->asic_type != CHIP_VEGA20) &&
(adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */
adev->runpm = true;
else if (amdgpu_device_supports_baco(dev) &&
(amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 on CI */
(amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
adev->runpm = true;
} else if (amdgpu_device_supports_baco(dev) &&
(amdgpu_runtime_pm != 0)) {
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
case CHIP_HAWAII:
#endif
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_SIENNA_CICHLID:
/* enable runpm if runpm=1 */
if (amdgpu_runtime_pm > 0)
adev->runpm = true;
break;
case CHIP_VEGA10:
/* turn runpm on if noretry=0 */
if (!amdgpu_noretry)
adev->runpm = true;
break;
default:
/* enable runpm on VI+ */
adev->runpm = true;
break;
}
}
/* Call ACPI methods: require modeset init
* but failure is not fatal
@ -991,7 +1006,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
return r;
goto pm_put;
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv)) {
@ -1042,6 +1057,7 @@ error_pasid:
out_suspend:
pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
return r;
@ -1340,8 +1356,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
/* MEC2 */
if (adev->asic_type == CHIP_KAVERI ||
(adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
if (adev->gfx.mec2_fw) {
query_fw.index = 1;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)

View file

@ -24,10 +24,32 @@
#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
AMDGPU_MES_PRIORITY_NUM_LEVELS
};
struct amdgpu_mes_funcs;
struct amdgpu_mes {
struct amdgpu_adev *adev;
struct amdgpu_device *adev;
uint32_t total_max_queue;
uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
struct amdgpu_ring ring;
const struct firmware *fw;
@ -45,8 +67,27 @@ struct amdgpu_mes {
uint32_t data_fw_version;
uint64_t data_start_addr;
/* eop gpu obj */
struct amdgpu_bo *eop_gpu_obj;
uint64_t eop_gpu_addr;
void *mqd_backup;
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs;
uint64_t sch_ctx_gpu_addr;
uint64_t *sch_ctx_ptr;
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
/* ip specific functions */
struct amdgpu_mes_funcs *funcs;
const struct amdgpu_mes_funcs *funcs;
};
struct mes_add_queue_input {

View file

@ -167,8 +167,10 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
@ -212,8 +214,10 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
mutex_lock(&adev->pm.mutex);
@ -307,8 +311,10 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
level = smu_get_performance_level(&adev->smu);
@ -369,8 +375,10 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
}
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
current_level = smu_get_performance_level(&adev->smu);
@ -449,8 +457,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
ret = smu_get_power_num_states(&adev->smu, &data);
@ -491,8 +501,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
pm = smu_get_current_power_state(smu);
@ -567,8 +579,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
state = data.states[idx];
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
/* only set user selected power states */
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
@ -608,8 +622,10 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
@ -650,8 +666,10 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
@ -790,8 +808,10 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
}
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
ret = smu_od_edit_dpm_table(&adev->smu, type,
@ -847,8 +867,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
@ -905,8 +927,10 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
@ -942,8 +966,10 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
@ -1001,8 +1027,10 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
@ -1071,8 +1099,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
@ -1101,8 +1131,10 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
@ -1135,8 +1167,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
@ -1165,8 +1199,10 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
@ -1199,8 +1235,10 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
@ -1231,8 +1269,10 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
@ -1265,8 +1305,10 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
@ -1297,8 +1339,10 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
@ -1331,8 +1375,10 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
@ -1363,8 +1409,10 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
@ -1397,8 +1445,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
@ -1429,8 +1479,10 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
@ -1462,8 +1514,10 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@ -1498,8 +1552,10 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
@ -1531,8 +1587,10 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@ -1587,8 +1645,10 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
size = smu_get_power_profile_mode(&adev->smu, buf);
@ -1609,7 +1669,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
const char *buf,
size_t count)
{
int ret = 0xff;
int ret;
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t parameter_size = 0;
@ -1650,8 +1710,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
parameter[parameter_size] = profile_mode;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
if (is_support_sw_smu(adev))
ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
@ -1687,8 +1749,10 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return r;
}
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
@ -1723,8 +1787,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return r;
}
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
@ -1770,8 +1836,10 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
return -ENODATA;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
@ -1808,9 +1876,76 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
return 0;
}
/**
* DOC: thermal_throttling_logging
*
* Thermal throttling pulls down the clock frequency and thus the performance.
* It's an useful mechanism to protect the chip from overheating. Since it
* impacts performance, the user controls whether it is enabled and if so,
* the log frequency.
*
* Reading back the file shows you the status(enabled or disabled) and
* the interval(in seconds) between each thermal logging.
*
* Writing an integer to the file, sets a new logging interval, in seconds.
* The value should be between 1 and 3600. If the value is less than 1,
* thermal logging is disabled. Values greater than 3600 are ignored.
*/
static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n",
adev->ddev->unique,
atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled",
adev->throttling_logging_rs.interval / HZ + 1);
}
static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
long throttling_logging_interval;
unsigned long flags;
int ret = 0;
ret = kstrtol(buf, 0, &throttling_logging_interval);
if (ret)
return ret;
if (throttling_logging_interval > 3600)
return -EINVAL;
if (throttling_logging_interval > 0) {
raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags);
/*
* Reset the ratelimit timer internals.
* This can effectively restart the timer.
*/
adev->throttling_logging_rs.interval =
(throttling_logging_interval - 1) * HZ;
adev->throttling_logging_rs.begin = 0;
adev->throttling_logging_rs.printed = 0;
adev->throttling_logging_rs.missed = 0;
raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags);
atomic_set(&adev->throttling_logging_enabled, 1);
} else {
atomic_set(&adev->throttling_logging_enabled, 0);
}
return count;
}
static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC),
@ -1830,6 +1965,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC),
};
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@ -1872,7 +2008,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
if (adev->flags & AMD_IS_APU)
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(unique_id)) {
if (!adev->unique_id)
if (asic_type != CHIP_VEGA10 &&
asic_type != CHIP_VEGA20 &&
asic_type != CHIP_ARCTURUS)
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_features)) {
if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
@ -2003,8 +2141,10 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
switch (channel) {
case PP_TEMP_JUNCTION:
@ -2134,8 +2274,10 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(adev->ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@ -2172,8 +2314,10 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
return err;
ret = pm_runtime_get_sync(adev->ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, value);
@ -2220,8 +2364,10 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@ -2272,8 +2418,10 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_percent(&adev->smu, &speed);
@ -2305,8 +2453,10 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &speed);
@ -2337,8 +2487,10 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
(void *)&min_rpm, &size);
@ -2365,8 +2517,10 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
(void *)&max_rpm, &size);
@ -2392,8 +2546,10 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
@ -2424,8 +2580,10 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@ -2473,8 +2631,10 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(adev->ddev->dev);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
}
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@ -2519,8 +2679,10 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
return -EINVAL;
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, pwm_mode);
@ -2551,8 +2713,10 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
@ -2590,8 +2754,10 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
@ -2626,8 +2792,10 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
@ -2665,11 +2833,13 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
if (is_support_sw_smu(adev)) {
smu_get_power_limit(&adev->smu, &limit, true, true);
smu_get_power_limit(&adev->smu, &limit, true);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
@ -2697,11 +2867,13 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
if (is_support_sw_smu(adev)) {
smu_get_power_limit(&adev->smu, &limit, false, true);
smu_get_power_limit(&adev->smu, &limit, false);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
@ -2740,8 +2912,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
err = pm_runtime_get_sync(adev->ddev->dev);
if (err < 0)
if (err < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
}
if (is_support_sw_smu(adev))
err = smu_set_power_limit(&adev->smu, value);
@ -2771,8 +2945,10 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
@ -2806,8 +2982,10 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
}
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
@ -3669,8 +3847,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
return -EPERM;
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
if (r < 0) {
pm_runtime_put_autosuspend(dev->dev);
return r;
}
amdgpu_device_ip_get_clockgating_state(adev, &flags);
seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);

View file

@ -98,6 +98,7 @@ static int psp_early_init(void *handle)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
@ -115,6 +116,44 @@ static int psp_early_init(void *handle)
return 0;
}
static void psp_memory_training_fini(struct psp_context *psp)
{
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
kfree(ctx->sys_cache);
ctx->sys_cache = NULL;
}
static int psp_memory_training_init(struct psp_context *psp)
{
int ret;
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
DRM_DEBUG("memory training is not supported!\n");
return 0;
}
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
if (ctx->sys_cache == NULL) {
DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
ret = -ENOMEM;
goto Err_out;
}
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
ctx->train_data_size,
ctx->p2c_train_data_offset,
ctx->c2p_train_data_offset);
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
return 0;
Err_out:
psp_memory_training_fini(psp);
return ret;
}
static int psp_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -127,7 +166,7 @@ static int psp_sw_init(void *handle)
return ret;
}
ret = psp_mem_training_init(psp);
ret = psp_memory_training_init(psp);
if (ret) {
DRM_ERROR("Failed to initialize memory training!\n");
return ret;
@ -152,15 +191,13 @@ static int psp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
psp_mem_training_fini(&adev->psp);
psp_memory_training_fini(&adev->psp);
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
if (adev->psp.ta_fw) {
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
}
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@ -231,8 +268,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in SRIOV */
skip_unsupport = (psp->cmd_buf_mem->resp.status == 0xffff000a) && amdgpu_sriov_vf(psp->adev);
/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
/* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW
@ -350,6 +388,26 @@ static int psp_tmr_init(struct psp_context *psp)
return ret;
}
static int psp_clear_vf_fw(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static int psp_tmr_load(struct psp_context *psp)
{
int ret;
@ -394,7 +452,7 @@ static int psp_asd_load(struct psp_context *psp)
* add workaround to bypass it for sriov now.
* TODO: add version check to make it common
*/
if (amdgpu_sriov_vf(psp->adev))
if (amdgpu_sriov_vf(psp->adev) || (psp->adev->asic_type == CHIP_SIENNA_CICHLID))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
@ -523,7 +581,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
}
int psp_ta_invoke(struct psp_context *psp,
static int psp_ta_invoke(struct psp_context *psp,
uint32_t ta_cmd_id,
uint32_t session_id)
{
@ -1316,6 +1374,14 @@ static int psp_hw_start(struct psp_context *psp)
}
}
if (psp->spl_bin_size) {
ret = psp_bootloader_load_spl(psp);
if (ret) {
DRM_ERROR("PSP load spl failed!\n");
return ret;
}
}
ret = psp_bootloader_load_sysdrv(psp);
if (ret) {
DRM_ERROR("PSP load sysdrv failed!\n");
@ -1335,6 +1401,12 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
ret = psp_clear_vf_fw(psp);
if (ret) {
DRM_ERROR("PSP clear vf fw!\n");
return ret;
}
ret = psp_tmr_init(psp);
if (ret) {
DRM_ERROR("PSP tmr init failed!\n");
@ -1389,6 +1461,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_SDMA7:
*type = GFX_FW_TYPE_SDMA7;
break;
case AMDGPU_UCODE_ID_CP_MES:
*type = GFX_FW_TYPE_CP_MES;
break;
case AMDGPU_UCODE_ID_CP_MES_DATA:
*type = GFX_FW_TYPE_MES_STACK;
break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
@ -1638,6 +1716,15 @@ static int psp_np_fw_load(struct psp_context *psp)
if (fw_load_skip_check(psp, ucode))
continue;
if (psp->autoload_supported &&
adev->asic_type == CHIP_SIENNA_CICHLID &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
/* PSP only receive one SDMA fw for sienna_cichlid,
* as all four sdma fw are same */
continue;
psp_print_fw_hdr(psp, ucode);
ret = psp_execute_np_fw_load(psp, ucode);
@ -1781,6 +1868,7 @@ static int psp_hw_fini(void *handle)
struct psp_context *psp = &adev->psp;
void *tmr_buf;
void **pptr;
int ret;
if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
@ -1789,6 +1877,11 @@ static int psp_hw_fini(void *handle)
}
psp_asd_unload(psp);
ret = psp_clear_vf_fw(psp);
if (ret) {
DRM_ERROR("PSP clear vf fw!\n");
return ret;
}
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
@ -2054,6 +2147,7 @@ int psp_init_sos_microcode(struct psp_context *psp,
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
int err = 0;
if (!chip_name) {
@ -2098,6 +2192,18 @@ int psp_init_sos_microcode(struct psp_context *psp,
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
}
if (sos_hdr->header.header_version_minor == 3) {
sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc_size_bytes);
adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_3->v1_1.toc_offset_bytes);
adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_size_bytes);
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_offset_bytes);
adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl_size_bytes);
adev->psp.spl_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_3->spl_offset_bytes);
}
break;
default:
dev_err(adev->dev,

View file

@ -50,6 +50,7 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
};
enum psp_ring_type
@ -84,6 +85,7 @@ struct psp_funcs
{
int (*init_microcode)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
@ -95,8 +97,6 @@ struct psp_funcs
enum psp_ring_type ring_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
int (*mem_training_init)(struct psp_context *psp);
void (*mem_training_fini)(struct psp_context *psp);
int (*mem_training)(struct psp_context *psp, uint32_t ops);
uint32_t (*ring_get_wptr)(struct psp_context *psp);
void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
@ -224,10 +224,12 @@ struct psp_context
uint32_t sos_bin_size;
uint32_t toc_bin_size;
uint32_t kdb_bin_size;
uint32_t spl_bin_size;
uint8_t *sys_start_addr;
uint8_t *sos_start_addr;
uint8_t *toc_start_addr;
uint8_t *kdb_start_addr;
uint8_t *spl_start_addr;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@ -298,6 +300,8 @@ struct amdgpu_psp_funcs {
((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
#define psp_bootloader_load_kdb(psp) \
((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0)
#define psp_bootloader_load_spl(psp) \
((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0)
#define psp_bootloader_load_sysdrv(psp) \
((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
@ -306,10 +310,6 @@ struct amdgpu_psp_funcs {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
#define psp_mem_training_init(psp) \
((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
#define psp_mem_training_fini(psp) \
((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0)
#define psp_mem_training(psp, ops) \
((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)

View file

@ -86,7 +86,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
amdgpu_ras_get_context(adev)->error_query_ready = ready;
}
bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
{
if (adev && amdgpu_ras_get_context(adev))
return amdgpu_ras_get_context(adev)->error_query_ready;
@ -318,6 +318,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
case 2:
if ((data.inject.address >= adev->gmc.mc_vram_size) ||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
dev_warn(adev->dev, "RAS WARN: input address "
"0x%llx is invalid.",
data.inject.address);
ret = -EINVAL;
break;
}
@ -502,7 +505,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
}
/* obj end */
void amdgpu_ras_parse_status_code(struct amdgpu_device* adev,
static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
const char* invoke_type,
const char* block_name,
enum ta_ras_status ret)
@ -812,7 +815,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
}
/* Trigger XGMI/WAFL error */
int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
struct ta_ras_trigger_error_input *block_info)
{
int ret;
@ -1914,9 +1917,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_check_supported(adev, &con->hw_supported,
&con->supported);
if (!con->hw_supported) {
amdgpu_ras_set_context(adev, NULL);
kfree(con);
return 0;
r = 0;
goto err_out;
}
con->features = 0;
@ -1927,29 +1929,29 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (adev->nbio.funcs->init_ras_controller_interrupt) {
r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
if (r)
return r;
goto err_out;
}
if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
if (r)
return r;
goto err_out;
}
amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
if (amdgpu_ras_fs_init(adev))
goto fs_out;
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto err_out;
}
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
con->hw_supported, con->supported);
return 0;
fs_out:
err_out:
amdgpu_ras_set_context(adev, NULL);
kfree(con);
return -EINVAL;
return r;
}
/* helper function to handle common stuff in ip late init phase */

View file

@ -62,7 +62,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
AMDGPU_RING_TYPE_KIQ
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_MES
};
enum amdgpu_ib_pool_type {

View file

@ -35,7 +35,6 @@
struct amdgpu_sync_entry {
struct hlist_node node;
struct dma_fence *fence;
bool explicit;
};
static struct kmem_cache *amdgpu_sync_slab;
@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
* Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise.
*/
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
bool explicit)
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
continue;
amdgpu_sync_keep_later(&e->fence, f);
/* Preserve eplicit flag to not loose pipe line sync */
e->explicit |= explicit;
return true;
}
return false;
@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
*
* @sync: sync object to add fence to
* @f: fence to sync to
* @explicit: if this is an explicit dependency
*
* Add the fence to the sync object.
*/
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
bool explicit)
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
if (!f)
return 0;
if (amdgpu_sync_add_later(sync, f, explicit))
if (amdgpu_sync_add_later(sync, f))
return 0;
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
if (!e)
return -ENOMEM;
e->explicit = explicit;
hash_add(sync->fences, &e->node, f->context);
e->fence = dma_fence_get(f);
return 0;
@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
return 0;
amdgpu_sync_keep_later(&sync->last_vm_update, fence);
return amdgpu_sync_fence(sync, fence, false);
return amdgpu_sync_fence(sync, fence);
}
/**
@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* always sync to the exclusive fence */
f = dma_resv_get_excl(resv);
r = amdgpu_sync_fence(sync, f, false);
r = amdgpu_sync_fence(sync, f);
flist = dma_resv_get_list(resv);
if (!flist || r)
@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* Always sync to moves, no matter what */
if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
r = amdgpu_sync_fence(sync, f, false);
r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
@ -275,7 +265,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
continue;
}
r = amdgpu_sync_fence(sync, f, false);
WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
"Adding eviction fence to sync obj");
r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
@ -330,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
* amdgpu_sync_get_fence - get the next fence from the sync object
*
* @sync: sync object to use
* @explicit: true if the next fence is explicit
*
* Get and removes the next fence from the sync object not signaled yet.
*/
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
@ -343,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
if (explicit)
*explicit = e->explicit;
hash_del(&e->node);
kmem_cache_free(amdgpu_sync_slab, e);
@ -376,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(clone, f, e->explicit);
r = amdgpu_sync_fence(clone, f);
if (r)
return r;
} else {

View file

@ -47,16 +47,14 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
bool explicit);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
bool *explicit);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);

View file

@ -58,6 +58,7 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
#include "bif/bif_4_1_d.h"
#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
@ -1110,7 +1111,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
#endif
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
{
@ -1817,54 +1818,86 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
{
if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
vram_size -= SZ_1M;
return ALIGN(vram_size, SZ_1M);
}
/**
* amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
*
* @adev: amdgpu_device pointer
*
* create bo vram reservation from memory training.
*/
static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
{
int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
memset(ctx, 0, sizeof(*ctx));
if (!adev->fw_vram_usage.mem_train_support) {
DRM_DEBUG("memory training does not support!\n");
return 0;
ctx->c2p_train_data_offset =
ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M);
ctx->p2c_train_data_offset =
(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size =
GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
ctx->train_data_size,
ctx->p2c_train_data_offset,
ctx->c2p_train_data_offset);
}
/*
* reserve TMR memory at the top of VRAM which holds
* IP Discovery data and is protected by PSP.
*/
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
bool mem_train_support = false;
if (!amdgpu_sriov_vf(adev)) {
ret = amdgpu_mem_train_support(adev);
if (ret == 1)
mem_train_support = true;
else if (ret == -1)
return -EINVAL;
else
DRM_DEBUG("memory training does not support!\n");
}
ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
/*
* Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
* the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
*
* Otherwise, fallback to legacy approach to check and reserve tmr block for ip
* discovery data and G6 memory training data respectively
*/
adev->discovery_tmr_size =
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
if (!adev->discovery_tmr_size)
adev->discovery_tmr_size = DISCOVERY_TMR_SIZE;
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
ctx->train_data_size,
ctx->p2c_train_data_offset,
ctx->c2p_train_data_offset);
ret = amdgpu_bo_create_kernel_at(adev,
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */
amdgpu_ttm_training_data_block_init(adev);
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
NULL);
if (ret) {
DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
return ret;
}
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
ret = amdgpu_bo_create_kernel_at(adev,
adev->gmc.real_vram_size - adev->discovery_tmr_size,
adev->discovery_tmr_size,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->discovery_memory,
NULL);
if (ret) {
DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
return ret;
}
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
return 0;
}
@ -1932,11 +1965,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/*
*The reserved vram for memory training must be pinned to the specified
*place on the VRAM, so reserve it early.
* only NAVI10 and onwards ASIC support for IP discovery.
* If IP discovery enabled, a block of memory should be
* reserved for IP discovey.
*/
if (!amdgpu_sriov_vf(adev)) {
r = amdgpu_ttm_training_reserve_vram_init(adev);
if (adev->discovery_bin) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
}
@ -1952,21 +1986,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
if (r)
return r;
/*
* reserve TMR memory at the top of VRAM which holds
* IP Discovery data and is protected by PSP.
*/
if (adev->discovery_tmr_size > 0) {
r = amdgpu_bo_create_kernel_at(adev,
adev->gmc.real_vram_size - adev->discovery_tmr_size,
adev->discovery_tmr_size,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->discovery_memory,
NULL);
if (r)
return r;
}
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));

View file

@ -279,6 +279,30 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("kdb_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes));
}
if (version_minor == 3) {
const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 =
container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1);
DRM_DEBUG("toc_header_version: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.toc_header_version));
DRM_DEBUG("toc_offset_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.toc_offset_bytes));
DRM_DEBUG("toc_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.toc_size_bytes));
DRM_DEBUG("kdb_header_version: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_header_version));
DRM_DEBUG("kdb_offset_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_offset_bytes));
DRM_DEBUG("kdb_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_size_bytes));
DRM_DEBUG("spl_header_version: %u\n",
le32_to_cpu(psp_hdr_v1_3->spl_header_version));
DRM_DEBUG("spl_offset_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->spl_offset_bytes));
DRM_DEBUG("spl_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_3->spl_size_bytes));
}
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
version_major, version_minor);
@ -365,6 +389,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@ -448,6 +473,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@ -462,12 +488,15 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES_DATA &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
@ -527,6 +556,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES) {
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)),
ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA) {
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)),
ucode->ucode_size);
}
return 0;

View file

@ -99,6 +99,14 @@ struct psp_firmware_header_v1_2 {
uint32_t kdb_size_bytes;
};
/* version_major=1, version_minor=3 */
struct psp_firmware_header_v1_3 {
struct psp_firmware_header_v1_1 v1_1;
uint32_t spl_header_version;
uint32_t spl_offset_bytes;
uint32_t spl_size_bytes;
};
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@ -266,6 +274,7 @@ union amdgpu_firmware_header {
struct smc_firmware_header_v2_0 smc_v2_0;
struct psp_firmware_header_v1_0 psp;
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct ta_firmware_header_v1_0 ta;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;

View file

@ -54,6 +54,12 @@
#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
/* Firmware Names */
#ifdef CONFIG_DRM_AMDGPU_SI
#define FIRMWARE_TAHITI "amdgpu/tahiti_uvd.bin"
#define FIRMWARE_VERDE "amdgpu/verde_uvd.bin"
#define FIRMWARE_PITCAIRN "amdgpu/pitcairn_uvd.bin"
#define FIRMWARE_OLAND "amdgpu/oland_uvd.bin"
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
@ -100,6 +106,12 @@ struct amdgpu_uvd_cs_ctx {
unsigned *buf_sizes;
};
#ifdef CONFIG_DRM_AMDGPU_SI
MODULE_FIRMWARE(FIRMWARE_TAHITI);
MODULE_FIRMWARE(FIRMWARE_VERDE);
MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
MODULE_FIRMWARE(FIRMWARE_OLAND);
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@ -133,6 +145,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_TAHITI:
fw_name = FIRMWARE_TAHITI;
break;
case CHIP_VERDE:
fw_name = FIRMWARE_VERDE;
break;
case CHIP_PITCAIRN:
fw_name = FIRMWARE_PITCAIRN;
break;
case CHIP_OLAND:
fw_name = FIRMWARE_OLAND;
break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
fw_name = FIRMWARE_BONAIRE;

View file

@ -42,6 +42,7 @@
#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@ -51,6 +52,7 @@ MODULE_FIRMWARE(FIRMWARE_RENOIR);
MODULE_FIRMWARE(FIRMWARE_NAVI10);
MODULE_FIRMWARE(FIRMWARE_NAVI14);
MODULE_FIRMWARE(FIRMWARE_NAVI12);
MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -107,6 +109,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
case CHIP_SIENNA_CICHLID:
fw_name = FIRMWARE_SIENNA_CICHLID;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
default:
return -EINVAL;
}

View file

@ -53,7 +53,9 @@
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@ -65,7 +67,7 @@
/* 1 second timeout */
#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \
#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \
({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
@ -75,7 +77,7 @@
RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
})
#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \
#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \
do { \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
@ -86,30 +88,40 @@
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \
#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
bool video_range, aon_range; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \
((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \
aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \
aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \
((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \
if (video_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \
(VCN_VID_IP_ADDRESS_2_0)); \
else if (aon_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \
(VCN_AON_IP_ADDRESS_2_0)); \
else if (video1_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \
(VCN_VID_IP_ADDRESS_2_0)); \
else if (aon1_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \
(VCN_AON_IP_ADDRESS_2_0)); \
else \
internal_reg_offset = (0xFFFFF & addr); \
\
internal_reg_offset >>= 2; \
})
#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \
#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \
({ \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
@ -118,7 +130,7 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \
#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
do { \
if (!indirect) { \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
@ -142,6 +154,7 @@ enum fw_queue_mode {
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
UVD_STATUS__UVD_BUSY = 0x00000004,
GB_ADDR_CONFIG_DEFAULT = 0x26010011,

View file

@ -26,6 +26,7 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
@ -255,12 +256,171 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj,
return ret;
}
static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
/* GPU will be marked bad on host if bp count more then 10,
* so alloc 512 is enough.
*/
unsigned int align_space = 512;
void *bps = NULL;
struct amdgpu_bo **bps_bo = NULL;
*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
if (!*data)
return -ENOMEM;
bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL);
bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL);
if (!bps || !bps_bo) {
kfree(bps);
kfree(bps_bo);
kfree(*data);
return -ENOMEM;
}
(*data)->bps = bps;
(*data)->bps_bo = bps_bo;
(*data)->count = 0;
(*data)->last_reserved = 0;
virt->ras_init_done = true;
return 0;
}
static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
struct amdgpu_bo *bo;
int i;
if (!data)
return;
for (i = data->last_reserved - 1; i >= 0; i--) {
bo = data->bps_bo[i];
amdgpu_bo_free_kernel(&bo, NULL, NULL);
data->bps_bo[i] = bo;
data->last_reserved = i;
}
}
void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
virt->ras_init_done = false;
if (!data)
return;
amdgpu_virt_ras_release_bp(adev);
kfree(data->bps);
kfree(data->bps_bo);
kfree(data);
virt->virt_eh_data = NULL;
}
static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
if (!data)
return;
memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
data->count += pages;
}
static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
struct amdgpu_bo *bo = NULL;
uint64_t bp;
int i;
if (!data)
return;
for (i = data->last_reserved; i < data->count; i++) {
bp = data->bps[i].retired_page;
/* There are two cases of reserve error should be ignored:
* 1) a ras bad page has been allocated (used by someone);
* 2) a ras bad page has been reserved (duplicate error injection
* for one page);
*/
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&bo, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
data->bps_bo[i] = bo;
data->last_reserved = i + 1;
bo = NULL;
}
}
static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t retired_page)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
int i;
if (!data)
return true;
for (i = 0; i < data->count; i++)
if (retired_page == data->bps[i].retired_page)
return true;
return false;
}
static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
uint64_t bp_block_offset, uint32_t bp_block_size)
{
struct eeprom_table_record bp;
uint64_t retired_page;
uint32_t bp_idx, bp_cnt;
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
retired_page = *(uint64_t *)(adev->fw_vram_usage.va +
bp_block_offset + bp_idx * sizeof(uint64_t));
bp.retired_page = retired_page;
if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
continue;
amdgpu_virt_ras_add_bps(adev, &bp, 1);
amdgpu_virt_ras_reserve_bps(adev);
}
}
}
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
uint32_t pf2vf_size = 0;
uint32_t checksum = 0;
uint32_t checkval;
char *str;
uint64_t bp_block_offset = 0;
uint32_t bp_block_size = 0;
struct amdgim_pf2vf_info_v2 *pf2vf_v2 = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
@ -275,6 +435,19 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
/* pf2vf message must be in 4K */
if (pf2vf_size > 0 && pf2vf_size < 4096) {
if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
pf2vf_v2 = (struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf;
bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_L & 0xFFFFFFFF) |
((((uint64_t)pf2vf_v2->bp_block_offset_H) << 32) & 0xFFFFFFFF00000000);
bp_block_size = pf2vf_v2->bp_block_size;
if (bp_block_size && !adev->virt.ras_init_done)
amdgpu_virt_init_ras_err_handler_data(adev);
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
}
checkval = amdgpu_virt_fw_reserve_get_checksum(
adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
adev->virt.fw_reserve.checksum_key, checksum);
@ -321,6 +494,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_VEGA20:
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_ARCTURUS:
reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
break;
@ -341,12 +515,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
}
}
bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
{
return amdgpu_sriov_is_debug(adev) ? true : false;
}
bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
{
return amdgpu_sriov_is_normal(adev) ? true : false;
}

View file

@ -143,19 +143,27 @@ struct amdgim_pf2vf_info_v2 {
uint32_t vce_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
uint32_t vce_enc_max_bandwidth;
/* Bad pages block position in BYTE */
uint32_t bp_block_offset_L;
uint32_t bp_block_offset_H;
/* Bad pages block size in BYTE */
uint32_t bp_block_size;
/* MEC FW position in kb from the start of VF visible frame buffer */
uint64_t mecfw_kboffset;
uint32_t mecfw_kboffset_L;
uint32_t mecfw_kboffset_H;
/* MEC FW size in KB */
uint32_t mecfw_ksize;
/* UVD FW position in kb from the start of VF visible frame buffer */
uint64_t uvdfw_kboffset;
uint32_t uvdfw_kboffset_L;
uint32_t uvdfw_kboffset_H;
/* UVD FW size in KB */
uint32_t uvdfw_ksize;
/* VCE FW position in kb from the start of VF visible frame buffer */
uint64_t vcefw_kboffset;
uint32_t vcefw_kboffset_L;
uint32_t vcefw_kboffset_H;
/* VCE FW size in KB */
uint32_t vcefw_ksize;
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (18 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 0)];
} __aligned(4);
@ -254,6 +262,17 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
} \
} while (0)
struct amdgpu_virt_ras_err_handler_data {
/* point to bad page records array */
struct eeprom_table_record *bps;
/* point to reserved bo array */
struct amdgpu_bo **bps_bo;
/* the count of entries */
int count;
/* last reserved entry's index + 1 */
int last_reserved;
};
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@ -272,6 +291,8 @@ struct amdgpu_virt {
uint32_t reg_access_mode;
int req_init_data_ver;
bool tdr_debug;
struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
bool ras_init_done;
};
#define amdgpu_sriov_enabled(adev) \
@ -323,6 +344,7 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
unsigned int key,
unsigned int chksum);
void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
void amdgpu_detect_virtualization(struct amdgpu_device *adev);

View file

@ -2208,7 +2208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
(bo && offset + size > amdgpu_bo_size(bo)))
(bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@ -2273,7 +2274,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
(bo && offset + size > amdgpu_bo_size(bo)))
(bo && offset + size > amdgpu_bo_size(bo)) ||
(eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
/* Allocate all the needed memory */

View file

@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
int r;
/* Wait for PD/PT moves to be completed */
r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false);
r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
if (r)
return r;

View file

@ -407,8 +407,6 @@ error:
* amdgpu_vram_mgr_del - free ranges
*
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
* @mem: TTM memory object
*
* Free the allocated VRAM again.

View file

@ -144,11 +144,6 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
};
void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
{
return &hive->device_list;
}
/**
* DOC: AMDGPU XGMI Support
*

View file

@ -0,0 +1,100 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "athub_v2_1.h"
#include "athub/athub_2_1_0_offset.h"
#include "athub/athub_2_1_0_sh_mask.h"
#include "navi10_enum.h"
#include "soc15_common.h"
static void
athub_v2_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
}
static void
athub_v2_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
(adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if(def != data)
WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
}
int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
if (amdgpu_sriov_vf(adev))
return 0;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
athub_v2_1_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
athub_v2_1_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE ? true : false);
break;
default:
break;
}
return 0;
}
void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags)
{
int data;
/* AMD_CG_SUPPORT_ATHUB_MGCG */
data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
/* AMD_CG_SUPPORT_ATHUB_LS */
if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_LS;
}

View file

@ -0,0 +1,30 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __ATHUB_V2_1_H__
#define __ATHUB_V2_1_H__
int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags);
#endif

View file

@ -2342,7 +2342,7 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
/* XXX need to determine what plls are available on each DCE11 part */
pll_in_use = amdgpu_pll_get_use_mask(crtc);
if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) {
if (adev->flags & AMD_IS_APU) {
if (!(pll_in_use & (1 << ATOM_PPLL1)))
return ATOM_PPLL1;
if (!(pll_in_use & (1 << ATOM_PPLL0)))

View file

@ -560,7 +560,7 @@ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
uint64_t config)
{
uint32_t lo_base_addr, hi_base_addr;
uint32_t lo_base_addr = 0, hi_base_addr = 0;
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);

View file

@ -55,6 +55,7 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@ -62,6 +63,54 @@
#define mmCGTT_GS_NGG_CLK_CTRL 0x5087
#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
#define mmCGTT_SPI_RA0_CLK_CTRL 0x507a
#define mmCGTT_SPI_RA0_CLK_CTRL_BASE_IDX 1
#define mmCGTT_SPI_RA1_CLK_CTRL 0x507b
#define mmCGTT_SPI_RA1_CLK_CTRL_BASE_IDX 1
#define GB_ADDR_CONFIG__NUM_PKRS__SHIFT 0x8
#define GB_ADDR_CONFIG__NUM_PKRS_MASK 0x00000700L
#define mmCP_MEC_CNTL_Sienna_Cichlid 0x0f55
#define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX 0
#define mmRLC_SAFE_MODE_Sienna_Cichlid 0x4ca0
#define mmRLC_SAFE_MODE_Sienna_Cichlid_BASE_IDX 1
#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
#define mmSPI_CONFIG_CNTL_Sienna_Cichlid 0x11ec
#define mmSPI_CONFIG_CNTL_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid 0x0fc1
#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid 0x0fc2
#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_TF_RING_SIZE_Sienna_Cichlid 0x0fc3
#define mmVGT_TF_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid 0x0fc4
#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid 0x0fc5
#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid_BASE_IDX 0
#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid 0x0fc6
#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid_BASE_IDX 0
#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid__SHIFT 0x1a
#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid_MASK 0x04000000L
#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid_MASK 0x00000FFCL
#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid__SHIFT 0x2
#define CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK 0x00000FFCL
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
#define mmCP_HYP_PFP_UCODE_ADDR 0x5814
#define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1
#define mmCP_HYP_PFP_UCODE_DATA 0x5815
#define mmCP_HYP_PFP_UCODE_DATA_BASE_IDX 1
#define mmCP_HYP_CE_UCODE_ADDR 0x5818
#define mmCP_HYP_CE_UCODE_ADDR_BASE_IDX 1
#define mmCP_HYP_CE_UCODE_DATA 0x5819
#define mmCP_HYP_CE_UCODE_DATA_BASE_IDX 1
#define mmCP_HYP_ME_UCODE_ADDR 0x5816
#define mmCP_HYP_ME_UCODE_ADDR_BASE_IDX 1
#define mmCP_HYP_ME_UCODE_DATA 0x5817
#define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
@ -89,6 +138,13 @@ MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
@ -3013,6 +3069,51 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
static const struct soc15_reg_golden golden_settings_gc_10_3[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
{
/* Pending on emulation bring up */
};
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@ -3193,6 +3294,14 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_rlc_spm_10_1_2_nv12,
(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
break;
case CHIP_SIENNA_CICHLID:
soc15_program_register_sequence(adev,
golden_settings_gc_10_3,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3));
soc15_program_register_sequence(adev,
golden_settings_gc_10_3_sienna_cichlid,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
break;
default:
break;
}
@ -3373,6 +3482,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 27))
adev->gfx.cp_fw_write_wait = true;
break;
case CHIP_SIENNA_CICHLID:
adev->gfx.cp_fw_write_wait = true;
break;
default:
break;
}
@ -3463,6 +3575,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
case CHIP_NAVI12:
chip_name = "navi12";
break;
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
default:
BUG();
}
@ -3992,6 +4107,16 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
break;
case CHIP_SIENNA_CICHLID:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
adev->gfx.config.gb_addr_config_fields.num_pkrs =
1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
break;
default:
BUG();
break;
@ -4104,6 +4229,14 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
break;
case CHIP_SIENNA_CICHLID:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
break;
default:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
@ -4273,12 +4406,6 @@ static int gfx_v10_0_sw_fini(void *handle)
return 0;
}
static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev)
{
/* TODO */
}
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance)
{
@ -4355,6 +4482,11 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
/* for ASICs that integrates GFX v10.3
* pa_sc_tile_steering_override should be set to 0 */
if (adev->asic_type == CHIP_SIENNA_CICHLID)
return 0;
/* init num_sc */
num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
adev->gfx.config.num_sc_per_sh;
@ -4523,8 +4655,6 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v10_0_tiling_mode_table_init(adev);
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
gfx_v10_0_get_tcc_info(adev);
@ -4558,7 +4688,12 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
u32 tmp;
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
enable ? 1 : 0);
@ -5271,6 +5406,14 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, 0);
for (i = 0; i < pfp_hdr->jt_size; i++)
WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_DATA,
le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
return 0;
}
@ -5340,6 +5483,14 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, 0);
for (i = 0; i < ce_hdr->jt_size; i++)
WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_DATA,
le32_to_cpup(fw_data + ce_hdr->jt_offset + i));
WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
return 0;
}
@ -5409,6 +5560,14 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, 0);
for (i = 0; i < me_hdr->jt_size; i++)
WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_DATA,
le32_to_cpup(fw_data + me_hdr->jt_offset + i));
WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
return 0;
}
@ -5549,12 +5708,24 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
DOORBELL_EN, 0);
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK);
break;
default:
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
break;
}
}
static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
@ -5669,11 +5840,27 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
break;
default:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
break;
}
} else {
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
default:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
break;
}
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
@ -5755,12 +5942,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
tmp |= 0x80;
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
break;
}
}
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
@ -6446,18 +6645,33 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
/* check if mmVGT_ESGS_RING_SIZE_UMD
* has been remapped to mmVGT_ESGS_RING_SIZE */
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data);
return true;
} else {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
return false;
}
break;
default:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
} else {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
return false;
if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
return true;
} else {
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
return false;
}
break;
}
}
@ -6469,59 +6683,119 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
* index will auto-inc after each data writting */
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_Sienna_Cichlid) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
break;
default:
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
/* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
break;
}
/* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
}
@ -6602,6 +6876,7 @@ static int gfx_v10_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
uint32_t tmp;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@ -6616,6 +6891,11 @@ static int gfx_v10_0_hw_fini(void *handle)
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
return 0;
}
gfx_v10_0_cp_enable(adev, false);
@ -6693,10 +6973,22 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_RLC,
1);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET,
SOFT_RESET_RLC,
1);
break;
default:
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET,
SOFT_RESET_RLC,
1);
break;
}
if (grbm_soft_reset) {
/* stop the rlc */
@ -6774,7 +7066,18 @@ static int gfx_v10_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
break;
case CHIP_SIENNA_CICHLID:
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
break;
default:
break;
}
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
@ -6819,13 +7122,30 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid),
RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
break;
default:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE),
RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
break;
}
}
@ -6834,7 +7154,14 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
break;
}
}
static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@ -7118,6 +7445,8 @@ static int gfx_v10_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
@ -7138,6 +7467,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@ -8236,6 +8566,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
case CHIP_NAVI12:

View file

@ -3039,7 +3039,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_active = 1;
}
int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
{
uint32_t tmp;
uint32_t mqd_reg;
@ -5209,7 +5209,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->lds_size = 64;
}
const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
static const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,

View file

@ -4589,7 +4589,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
return 0;
}
int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
struct vi_mqd *mqd)
{
uint32_t mqd_reg;

View file

@ -722,7 +722,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};
void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
{
static void *scratch_reg0;
static void *scratch_reg1;
@ -1890,7 +1890,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
if (adev->flags & AMD_IS_APU) {
/* TODO: double check the cp_table_size for RV */
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_gfx_rlc_init_cpt(adev);
@ -1960,7 +1960,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
@ -2384,7 +2384,7 @@ static int gfx_v9_0_sw_fini(void *handle)
gfx_v9_0_mec_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
if (adev->flags & AMD_IS_APU) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
@ -2856,8 +2856,8 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
pwr_10_0_gfxip_control_over_cgpg(adev, true);
if (adev->asic_type != CHIP_RENOIR)
pwr_10_0_gfxip_control_over_cgpg(adev, true);
}
}

View file

@ -181,6 +181,10 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
tmp = mmGCVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
}
static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)

View file

@ -0,0 +1,411 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "gfxhub_v2_1.h"
#include "gc/gc_10_3_0_offset.h"
#include "gc/gc_10_3_0_sh_mask.h"
#include "gc/gc_10_3_0_default.h"
#include "navi10_enum.h"
#include "soc15_common.h"
u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE);
base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
}
void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
}
static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
gfxhub_v2_1_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
/* Disable AGP. */
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
adev->gmc.vram_start >> 18);
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
adev->gmc.vram_end >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
}
static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v2_1_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
tmp = mmGCVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
tmp = mmGCVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
tmp = mmGCVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
}
static void gfxhub_v2_1_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
}
static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
{
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
}
static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
{
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
{
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
}
}
int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
/*
* GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
/* GART Enable. */
gfxhub_v2_1_init_gart_aperture_regs(adev);
gfxhub_v2_1_init_system_aperture_regs(adev);
gfxhub_v2_1_init_tlb_regs(adev);
gfxhub_v2_1_init_cache_regs(adev);
gfxhub_v2_1_enable_system_domain(adev);
gfxhub_v2_1_disable_identity_aperture(adev);
gfxhub_v2_1_setup_vmid_config(adev);
gfxhub_v2_1_program_invalidation(adev);
return 0;
}
void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
{
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
}
/**
* gfxhub_v2_1_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
void gfxhub_v2_1_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
}
int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
{
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
u32 max_num_physical_nodes = 0;
u32 max_physical_node_id = 0;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
max_num_physical_nodes = 4;
max_physical_node_id = 3;
break;
default:
return -EINVAL;
}
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
}
return 0;
}

View file

@ -19,45 +19,21 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef _DMUB_META_H_
#define _DMUB_META_H_
#include "dmub_types.h"
#ifndef __GFXHUB_V2_1_H__
#define __GFXHUB_V2_1_H__
#pragma pack(push, 1)
u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev);
int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev);
void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev);
void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v2_1_init(struct amdgpu_device *adev);
u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev);
void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
/* Magic value for identifying dmub_fw_meta_info */
#define DMUB_FW_META_MAGIC 0x444D5542
int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev);
/* Offset from the end of the file to the dmub_fw_meta_info */
#define DMUB_FW_META_OFFSET 0x24
/**
* struct dmub_fw_meta_info - metadata associated with fw binary
*
* NOTE: This should be considered a stable API. Fields should
* not be repurposed or reordered. New fields should be
* added instead to extend the structure.
*
* @magic_value: magic value identifying DMUB firmware meta info
* @fw_region_size: size of the firmware state region
* @trace_buffer_size: size of the tracebuffer region
*/
struct dmub_fw_meta_info {
uint32_t magic_value;
uint32_t fw_region_size;
uint32_t trace_buffer_size;
};
/* Ensure that the structure remains 64 bytes. */
union dmub_fw_meta {
struct dmub_fw_meta_info info;
uint8_t reserved[64];
};
#pragma pack(pop)
#endif /* _DMUB_META_H_ */
#endif

View file

@ -45,8 +45,10 @@
#include "nbio_v2_3.h"
#include "gfxhub_v2_0.h"
#include "gfxhub_v2_1.h"
#include "mmhub_v2_0.h"
#include "athub_v2_0.h"
#include "athub_v2_1.h"
/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
@ -348,6 +350,24 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* flush hdp cache */
adev->nbio.funcs->hdp_flush(adev, NULL);
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq.ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
!adev->in_gpu_reset) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
u32 req = hub->vm_inv_eng0_req + eng;
u32 ack = hub->vm_inv_eng0_ack + eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
return;
}
mutex_lock(&adev->mman.gtt_window_lock);
if (vmhub == AMDGPU_MMHUB_0) {
@ -666,13 +686,26 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = 0;
base = gfxhub_v2_0_get_fb_location(adev);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
base = gfxhub_v2_1_get_fb_location(adev);
else
base = gfxhub_v2_0_get_fb_location(adev);
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev);
else
adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
/* add the xgmi offset of the physical node */
adev->vm_manager.vram_base_offset +=
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
}
/**
@ -712,6 +745,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@ -780,24 +814,32 @@ static int gmc_v10_0_sw_init(void *handle)
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v2_0_init(adev);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
gfxhub_v2_1_init(adev);
else
gfxhub_v2_0_init(adev);
mmhub_v2_0_init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);
r = amdgpu_atomfirmware_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
if (!amdgpu_emu_mode)
adev->gmc.vram_width = vram_width;
else
if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;
adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
} else {
r = amdgpu_atomfirmware_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor);
adev->gmc.vram_width = vram_width;
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
}
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
@ -836,6 +878,12 @@ static int gmc_v10_0_sw_init(void *handle)
return r;
}
if (adev->gmc.xgmi.supported) {
r = gfxhub_v2_1_get_xgmi_info(adev);
if (r)
return r;
}
r = gmc_v10_0_mc_init(adev);
if (r)
return r;
@ -896,6 +944,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
break;
default:
break;
@ -922,7 +971,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
r = gfxhub_v2_0_gart_enable(adev);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
r = gfxhub_v2_1_gart_enable(adev);
else
r = gfxhub_v2_0_gart_enable(adev);
if (r)
return r;
@ -943,7 +995,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
gfxhub_v2_0_set_fault_enable_default(adev, value);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
gfxhub_v2_1_set_fault_enable_default(adev, value);
else
gfxhub_v2_0_set_fault_enable_default(adev, value);
mmhub_v2_0_set_fault_enable_default(adev, value);
gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
@ -981,7 +1036,10 @@ static int gmc_v10_0_hw_init(void *handle)
*/
static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
gfxhub_v2_0_gart_disable(adev);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
gfxhub_v2_1_gart_disable(adev);
else
gfxhub_v2_0_gart_disable(adev);
mmhub_v2_0_gart_disable(adev);
amdgpu_gart_table_vram_unpin(adev);
}
@ -1052,7 +1110,10 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
if (r)
return r;
return athub_v2_0_set_clockgating(adev, state);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
}
static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
@ -1061,7 +1122,10 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
mmhub_v2_0_get_clockgating(adev, flags);
athub_v2_0_get_clockgating(adev, flags);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);
}
static int gmc_v10_0_set_powergating_state(void *handle,

View file

@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
#include "jpeg_v1_0.h"
#include "vcn/vcn_1_0_offset.h"
#include "vcn/vcn_1_0_sh_mask.h"

View file

@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "vcn/vcn_2_0_0_offset.h"
#include "vcn/vcn_2_0_0_sh_mask.h"
@ -229,9 +230,9 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
SOC15_WAIT_ON_RREG(JPEG, 0,
r = SOC15_WAIT_ON_RREG(JPEG, 0,
mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
@ -260,9 +261,9 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)
data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
@ -676,10 +677,10 @@ static bool jpeg_v2_0_is_idle(void *handle)
static int jpeg_v2_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 0;
int ret;
SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
return ret;
}

View file

@ -449,20 +449,20 @@ static bool jpeg_v2_5_is_idle(void *handle)
static int jpeg_v2_5_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, ret = 0;
int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
ret = SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
if (ret)
return ret;
}
return ret;
return 0;
}
static int jpeg_v2_5_set_clockgating_state(void *handle,

View file

@ -0,0 +1,613 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "vcn/vcn_3_0_0_offset.h"
#include "vcn/vcn_3_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
static int jpeg_v3_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
/**
* jpeg_v3_0_early_init - set function pointers
*
* @handle: amdgpu_device pointer
*
* Set ring and irq function pointers
*/
static int jpeg_v3_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->asic_type == CHIP_SIENNA_CICHLID) {
u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
return -ENOENT;
}
adev->jpeg.num_jpeg_inst = 1;
jpeg_v3_0_set_dec_ring_funcs(adev);
jpeg_v3_0_set_irq_funcs(adev);
return 0;
}
/**
* jpeg_v3_0_sw_init - sw init for JPEG block
*
* @handle: amdgpu_device pointer
*
* Load firmware and sw initialization
*/
static int jpeg_v3_0_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int r;
/* JPEG TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
if (r)
return r;
r = amdgpu_jpeg_sw_init(adev);
if (r)
return r;
r = amdgpu_jpeg_resume(adev);
if (r)
return r;
ring = &adev->jpeg.inst->ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
sprintf(ring->name, "jpeg_dec");
r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
return 0;
}
/**
* jpeg_v3_0_sw_fini - sw fini for JPEG block
*
* @handle: amdgpu_device pointer
*
* JPEG suspend and free up sw allocation
*/
static int jpeg_v3_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
r = amdgpu_jpeg_sw_fini(adev);
return r;
}
/**
* jpeg_v3_0_hw_init - start and test JPEG block
*
* @handle: amdgpu_device pointer
*
*/
static int jpeg_v3_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
DRM_INFO("JPEG decode initialized successfully.\n");
return 0;
}
/**
* jpeg_v3_0_hw_fini - stop the hardware block
*
* @handle: amdgpu_device pointer
*
* Stop the JPEG block, mark ring as not ready any more
*/
static int jpeg_v3_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
ring = &adev->jpeg.inst->ring_dec;
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
ring->sched.ready = false;
return 0;
}
/**
* jpeg_v3_0_suspend - suspend JPEG block
*
* @handle: amdgpu_device pointer
*
* HW fini and suspend JPEG block
*/
static int jpeg_v3_0_suspend(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = jpeg_v3_0_hw_fini(adev);
if (r)
return r;
r = amdgpu_jpeg_suspend(adev);
return r;
}
/**
* jpeg_v3_0_resume - resume JPEG block
*
* @handle: amdgpu_device pointer
*
* Resume firmware and hw init JPEG block
*/
static int jpeg_v3_0_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
r = amdgpu_jpeg_resume(adev);
if (r)
return r;
r = jpeg_v3_0_hw_init(adev);
return r;
}
static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev)
{
uint32_t data = 0;
data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
| JPEG_CGC_GATE__JPEG2_DEC_MASK
| JPEG_CGC_GATE__JPEG_ENC_MASK
| JPEG_CGC_GATE__JMCIF_MASK
| JPEG_CGC_GATE__JRBBM_MASK);
WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
| JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
| JPEG_CGC_CTRL__JMCIF_MODE_MASK
| JPEG_CGC_CTRL__JRBBM_MODE_MASK);
WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
}
static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device* adev)
{
uint32_t data = 0;
data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
|JPEG_CGC_GATE__JPEG2_DEC_MASK
|JPEG_CGC_GATE__JPEG_ENC_MASK
|JPEG_CGC_GATE__JMCIF_MASK
|JPEG_CGC_GATE__JRBBM_MASK);
WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
}
static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev)
{
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
uint32_t data = 0;
int r = 0;
data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
r = SOC15_WAIT_ON_RREG(JPEG, 0,
mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
return r;
}
}
/* disable anti hang mechanism */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
/* keep the JPEG in static PG mode */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
return 0;
}
static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device* adev)
{
/* enable anti hang mechanism */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
uint32_t data = 0;
int r = 0;
data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
return r;
}
}
return 0;
}
/**
* jpeg_v3_0_start - start JPEG block
*
* @adev: amdgpu_device pointer
*
* Setup and start the JPEG block
*/
static int jpeg_v3_0_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
int r;
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_jpeg(adev, true);
/* disable power gating */
r = jpeg_v3_0_disable_static_power_gating(adev);
if (r)
return r;
/* JPEG disable CGC */
jpeg_v3_0_disable_clock_gating(adev);
/* MJPEG global tiling registers */
WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
WREG32_SOC15(JPEG, 0, mmJPEG_ENC_GFX10_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
/* enable JMI channel */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
~UVD_JMI_CNTL__SOFT_RESET_MASK);
/* enable System Interrupt for JRBC */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
JPEG_SYS_INT_EN__DJRBC_MASK,
~JPEG_SYS_INT_EN__DJRBC_MASK);
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr));
WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
upper_32_bits(ring->gpu_addr));
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
return 0;
}
/**
* jpeg_v3_0_stop - stop JPEG block
*
* @adev: amdgpu_device pointer
*
* stop the JPEG block
*/
static int jpeg_v3_0_stop(struct amdgpu_device *adev)
{
int r;
/* reset JMI */
WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
UVD_JMI_CNTL__SOFT_RESET_MASK,
~UVD_JMI_CNTL__SOFT_RESET_MASK);
jpeg_v3_0_enable_clock_gating(adev);
/* enable power gating */
r = jpeg_v3_0_enable_static_power_gating(adev);
if (r)
return r;
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_jpeg(adev, false);
return 0;
}
/**
* jpeg_v3_0_dec_ring_get_rptr - get read pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware read pointer
*/
static uint64_t jpeg_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
}
/**
* jpeg_v3_0_dec_ring_get_wptr - get write pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware write pointer
*/
static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
else
return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
}
/**
* jpeg_v3_0_dec_ring_set_wptr - set write pointer
*
* @ring: amdgpu_ring pointer
*
* Commits the write pointer to the hardware
*/
static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
}
}
static bool jpeg_v3_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
return ret;
}
static int jpeg_v3_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
if (ret)
return ret;
return ret;
}
static int jpeg_v3_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
if (enable) {
if (!jpeg_v3_0_is_idle(handle))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
jpeg_v3_0_disable_clock_gating(adev);
}
return 0;
}
static int jpeg_v3_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
if(state == adev->jpeg.cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
ret = jpeg_v3_0_stop(adev);
else
ret = jpeg_v3_0_start(adev);
if(!ret)
adev->jpeg.cur_state = state;
return ret;
}
static int jpeg_v3_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
return 0;
}
static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: JPEG TRAP\n");
switch (entry->src_id) {
case VCN_2_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
break;
}
return 0;
}
static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.name = "jpeg_v3_0",
.early_init = jpeg_v3_0_early_init,
.late_init = NULL,
.sw_init = jpeg_v3_0_sw_init,
.sw_fini = jpeg_v3_0_sw_fini,
.hw_init = jpeg_v3_0_hw_init,
.hw_fini = jpeg_v3_0_hw_fini,
.suspend = jpeg_v3_0_suspend,
.resume = jpeg_v3_0_resume,
.is_idle = jpeg_v3_0_is_idle,
.wait_for_idle = jpeg_v3_0_wait_for_idle,
.check_soft_reset = NULL,
.pre_soft_reset = NULL,
.soft_reset = NULL,
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
.vmhub = AMDGPU_MMHUB_0,
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v3_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v3_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
.emit_ib_size = 22, /* jpeg_v3_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
.test_ring = amdgpu_jpeg_dec_ring_test_ring,
.test_ib = amdgpu_jpeg_dec_ring_test_ib,
.insert_nop = jpeg_v2_0_dec_ring_nop,
.insert_start = jpeg_v2_0_dec_ring_insert_start,
.insert_end = jpeg_v2_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_jpeg_ring_begin_use,
.end_use = amdgpu_jpeg_ring_end_use,
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs;
DRM_INFO("JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
.set = jpeg_v3_0_set_interrupt_state,
.process = jpeg_v3_0_process_interrupt,
};
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->irq.num_types = 1;
adev->jpeg.inst->irq.funcs = &jpeg_v3_0_irq_funcs;
}
const struct amdgpu_ip_block_version jpeg_v3_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 3,
.minor = 0,
.rev = 0,
.funcs = &jpeg_v3_0_ip_funcs,
};

View file

@ -0,0 +1,29 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __JPEG_V3_0_H__
#define __JPEG_V3_0_H__
extern const struct amdgpu_ip_block_version jpeg_v3_0_ip_block;
#endif /* __JPEG_V3_0_H__ */

View file

@ -0,0 +1,443 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MES_API_DEF_H__
#define __MES_API_DEF_H__
#pragma pack(push, 4)
#define MES_API_VERSION 1
/* Driver submits one API(cmd) as a single Frame and this command size is same
* for all API to ease the debugging and parsing of ring buffer.
*/
enum { API_FRAME_SIZE_IN_DWORDS = 64 };
/* To avoid command in scheduler context to be overwritten whenenver mutilple
* interrupts come in, this creates another queue.
*/
enum { API_NUMBER_OF_COMMAND_MAX = 32 };
enum MES_API_TYPE {
MES_API_TYPE_SCHEDULER = 1,
MES_API_TYPE_MAX
};
enum MES_SCH_API_OPCODE {
MES_SCH_API_SET_HW_RSRC = 0,
MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */
MES_SCH_API_ADD_QUEUE = 2,
MES_SCH_API_REMOVE_QUEUE = 3,
MES_SCH_API_PERFORM_YIELD = 4,
MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5,
MES_SCH_API_SUSPEND = 6,
MES_SCH_API_RESUME = 7,
MES_SCH_API_RESET = 8,
MES_SCH_API_SET_LOG_BUFFER = 9,
MES_SCH_API_CHANGE_GANG_PRORITY = 10,
MES_SCH_API_QUERY_SCHEDULER_STATUS = 11,
MES_SCH_API_PROGRAM_GDS = 12,
MES_SCH_API_SET_DEBUG_VMID = 13,
MES_SCH_API_MISC = 14,
MES_SCH_API_MAX = 0xFF
};
union MES_API_HEADER {
struct {
uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */
uint32_t opcode : 8;
uint32_t dwsize : 8; /* including header */
uint32_t reserved : 12;
};
uint32_t u32All;
};
enum MES_AMD_PRIORITY_LEVEL {
AMD_PRIORITY_LEVEL_LOW = 0,
AMD_PRIORITY_LEVEL_NORMAL = 1,
AMD_PRIORITY_LEVEL_MEDIUM = 2,
AMD_PRIORITY_LEVEL_HIGH = 3,
AMD_PRIORITY_LEVEL_REALTIME = 4,
AMD_PRIORITY_NUM_LEVELS
};
enum MES_QUEUE_TYPE {
MES_QUEUE_TYPE_GFX,
MES_QUEUE_TYPE_COMPUTE,
MES_QUEUE_TYPE_SDMA,
MES_QUEUE_TYPE_MAX,
};
struct MES_API_STATUS {
uint64_t api_completion_fence_addr;
uint64_t api_completion_fence_value;
};
enum { MAX_COMPUTE_PIPES = 8 };
enum { MAX_GFX_PIPES = 2 };
enum { MAX_SDMA_PIPES = 2 };
enum { MAX_COMPUTE_HQD_PER_PIPE = 8 };
enum { MAX_GFX_HQD_PER_PIPE = 8 };
enum { MAX_SDMA_HQD_PER_PIPE = 10 };
enum { MAX_QUEUES_IN_A_GANG = 8 };
enum VM_HUB_TYPE {
VM_HUB_TYPE_GC = 0,
VM_HUB_TYPE_MM = 1,
VM_HUB_TYPE_MAX,
};
enum { VMID_INVALID = 0xffff };
enum { MAX_VMID_GCHUB = 16 };
enum { MAX_VMID_MMHUB = 16 };
enum MES_LOG_OPERATION {
MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
};
enum MES_LOG_CONTEXT_STATE {
MES_LOG_CONTEXT_STATE_IDLE = 0,
MES_LOG_CONTEXT_STATE_RUNNING = 1,
MES_LOG_CONTEXT_STATE_READY = 2,
MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
};
struct MES_LOG_CONTEXT_STATE_CHANGE {
void *h_context;
enum MES_LOG_CONTEXT_STATE new_context_state;
};
struct MES_LOG_ENTRY_HEADER {
uint32_t first_free_entry_index;
uint32_t wraparound_count;
uint64_t number_of_entries;
uint64_t reserved[2];
};
struct MES_LOG_ENTRY_DATA {
uint64_t gpu_time_stamp;
uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
uint32_t reserved_operation_type_bits;
union {
struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
uint64_t reserved_operation_data[2];
};
};
struct MES_LOG_BUFFER {
struct MES_LOG_ENTRY_HEADER header;
struct MES_LOG_ENTRY_DATA entries[1];
};
union MESAPI_SET_HW_RESOURCES {
struct {
union MES_API_HEADER header;
uint32_t vmid_mask_mmhub;
uint32_t vmid_mask_gfxhub;
uint32_t gds_size;
uint32_t paging_vmid;
uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
uint64_t g_sch_ctx_gpu_mc_ptr;
uint64_t query_status_fence_gpu_mc_ptr;
struct MES_API_STATUS api_status;
union {
struct {
uint32_t disable_reset : 1;
uint32_t reserved : 31;
};
uint32_t uint32_t_all;
};
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__ADD_QUEUE {
struct {
union MES_API_HEADER header;
uint32_t process_id;
uint64_t page_table_base_addr;
uint64_t process_va_start;
uint64_t process_va_end;
uint64_t process_quantum;
uint64_t process_context_addr;
uint64_t gang_quantum;
uint64_t gang_context_addr;
uint32_t inprocess_gang_priority;
enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
enum MES_QUEUE_TYPE queue_type;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_mask;
struct {
uint32_t paging : 1;
uint32_t debug_vmid : 4;
uint32_t program_gds : 1;
uint32_t is_gang_suspended : 1;
uint32_t is_tmz_queue : 1;
uint32_t reserved : 24;
};
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__REMOVE_QUEUE {
struct {
union MES_API_HEADER header;
uint32_t doorbell_offset;
uint64_t gang_context_addr;
struct {
uint32_t unmap_legacy_gfx_queue : 1;
uint32_t reserved : 31;
};
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__SET_SCHEDULING_CONFIG {
struct {
union MES_API_HEADER header;
/* Grace period when preempting another priority band for this
* priority band. The value for idle priority band is ignored,
* as it never preempts other bands.
*/
uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS];
/* Default quantum for scheduling across processes within
* a priority band.
*/
uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS];
/* Default grace period for processes that preempt each other
* within a priority band.
*/
uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS];
/* For normal level this field specifies the target GPU
* percentage in situations when it's starved by the high level.
* Valid values are between 0 and 50, with the default being 10.
*/
uint32_t normal_yield_percent;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__PERFORM_YIELD {
struct {
union MES_API_HEADER header;
uint32_t dummy;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__CHANGE_GANG_PRIORITY_LEVEL {
struct {
union MES_API_HEADER header;
uint32_t inprocess_gang_priority;
enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
uint64_t gang_quantum;
uint64_t gang_context_addr;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__SUSPEND {
struct {
union MES_API_HEADER header;
/* false - suspend all gangs; true - specific gang */
struct {
uint32_t suspend_all_gangs : 1;
uint32_t reserved : 31;
};
/* gang_context_addr is valid only if suspend_all = false */
uint64_t gang_context_addr;
uint64_t suspend_fence_addr;
uint32_t suspend_fence_value;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__RESUME {
struct {
union MES_API_HEADER header;
/* false - resume all gangs; true - specified gang */
struct {
uint32_t resume_all_gangs : 1;
uint32_t reserved : 31;
};
/* valid only if resume_all_gangs = false */
uint64_t gang_context_addr;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__RESET {
struct {
union MES_API_HEADER header;
struct {
uint32_t reset_queue : 1;
uint32_t reserved : 31;
};
uint64_t gang_context_addr;
uint32_t doorbell_offset; /* valid only if reset_queue = true */
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__SET_LOGGING_BUFFER {
struct {
union MES_API_HEADER header;
/* There are separate log buffers for each queue type */
enum MES_QUEUE_TYPE log_type;
/* Log buffer GPU Address */
uint64_t logging_buffer_addr;
/* number of entries in the log buffer */
uint32_t number_of_entries;
/* Entry index at which CPU interrupt needs to be signalled */
uint32_t interrupt_entry;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__QUERY_MES_STATUS {
struct {
union MES_API_HEADER header;
bool mes_healthy; /* 0 - not healthy, 1 - healthy */
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__PROGRAM_GDS {
struct {
union MES_API_HEADER header;
uint64_t process_context_addr;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_mask;
struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
union MESAPI__SET_DEBUG_VMID {
struct {
union MES_API_HEADER header;
struct MES_API_STATUS api_status;
union {
struct {
uint32_t use_gds : 1;
uint32_t reserved : 31;
} flags;
uint32_t u32All;
};
uint32_t reserved;
uint32_t debug_vmid;
uint64_t process_context_addr;
uint64_t page_table_base_addr;
uint64_t process_va_start;
uint64_t process_va_end;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_mask;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
enum MESAPI_MISC_OPCODE {
MESAPI_MISC__MODIFY_REG,
MESAPI_MISC__MAX,
};
enum MODIFY_REG_SUBCODE {
MODIFY_REG__OVERWRITE,
MODIFY_REG__RMW_OR,
MODIFY_REG__RMW_AND,
MODIFY_REG__MAX,
};
enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
union MESAPI__MISC {
struct {
union MES_API_HEADER header;
enum MESAPI_MISC_OPCODE opcode;
struct MES_API_STATUS api_status;
union {
struct {
enum MODIFY_REG_SUBCODE subcode;
uint32_t reg_offset;
uint32_t reg_value;
} modify_reg;
uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
};
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
#pragma pack(pop)
#endif

View file

@ -28,19 +28,165 @@
#include "nv.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "v10_structs.h"
#include "mes_api_def.h"
#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
static int mes_v10_1_hw_fini(void *handle);
#define MES_EOP_SIZE 2048
static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs],
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG();
}
}
static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
{
u64 wptr;
if (ring->use_doorbell)
wptr = atomic64_read((atomic64_t *)
&ring->adev->wb.wb[ring->wptr_offs]);
else
BUG();
return wptr;
}
static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
.type = AMDGPU_RING_TYPE_MES,
.align_mask = 1,
.nop = 0,
.support_64bit_ptrs = true,
.get_rptr = mes_v10_1_ring_get_rptr,
.get_wptr = mes_v10_1_ring_get_wptr,
.set_wptr = mes_v10_1_ring_set_wptr,
.insert_nop = amdgpu_ring_insert_nop,
};
static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size)
{
int ndw = size / 4;
signed long r;
union MESAPI__ADD_QUEUE *x_pkt = pkt;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
BUG_ON(size % 4 != 0);
if (amdgpu_ring_alloc(ring, ndw))
return -ENOMEM;
amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
adev->usec_timeout);
if (r < 1) {
DRM_ERROR("MES failed to response msg=%d\n",
x_pkt->header.opcode);
return -ETIMEDOUT;
}
return 0;
}
static int convert_to_mes_queue_type(int queue_type)
{
if (queue_type == AMDGPU_RING_TYPE_GFX)
return MES_QUEUE_TYPE_GFX;
else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
return MES_QUEUE_TYPE_COMPUTE;
else if (queue_type == AMDGPU_RING_TYPE_SDMA)
return MES_QUEUE_TYPE_SDMA;
else
BUG();
return -1;
}
static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
return 0;
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.process_id = input->process_id;
mes_add_queue_pkt.page_table_base_addr =
input->page_table_base_addr - adev->gmc.vram_start;
mes_add_queue_pkt.process_va_start = input->process_va_start;
mes_add_queue_pkt.process_va_end = input->process_va_end;
mes_add_queue_pkt.process_quantum = input->process_quantum;
mes_add_queue_pkt.process_context_addr = input->process_context_addr;
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_add_queue_pkt.inprocess_gang_priority =
input->inprocess_gang_priority;
mes_add_queue_pkt.gang_global_priority_level =
input->gang_global_priority_level;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_add_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
}
static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
return 0;
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
}
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
@ -55,6 +201,68 @@ static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
return 0;
}
static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_status_pkt, sizeof(mes_status_pkt));
}
static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
{
int i;
struct amdgpu_device *adev = mes->adev;
union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.paging_vmid = 0;
mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
mes->query_status_fence_gpu_addr;
for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] =
mes->compute_hqd_mask[i];
for (i = 0; i < MAX_GFX_PIPES; i++)
mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
for (i = 0; i < MAX_SDMA_PIPES; i++)
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.agreegated_doorbells[i] =
mes->agreegated_doorbells[i];
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
}
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.add_hw_queue = mes_v10_1_add_hw_queue,
.remove_hw_queue = mes_v10_1_remove_hw_queue,
@ -68,11 +276,15 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
char fw_name[30];
int err;
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
switch (adev->asic_type) {
case CHIP_NAVI10:
chip_name = "navi10";
break;
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
default:
BUG();
}
@ -100,6 +312,22 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
info->fw = adev->mes.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
info->fw = adev->mes.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
PAGE_SIZE);
}
return 0;
}
@ -267,15 +495,43 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
/* invalidate ICACHE */
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
break;
default:
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
break;
}
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
break;
default:
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
break;
}
/* prime the ICACHE. */
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
break;
default:
data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
break;
}
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
break;
default:
WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
break;
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@ -283,15 +539,377 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
return 0;
}
static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
{
int r;
u32 *eop;
r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.eop_gpu_obj,
&adev->mes.eop_gpu_addr,
(void **)&eop);
if (r) {
dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
return r;
}
memset(eop, 0, adev->mes.eop_gpu_obj->tbo.mem.size);
amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
return 0;
}
static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
{
int r;
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
if (r) {
dev_err(adev->dev,
"(%d) mes sch_ctx_offs wb alloc failed\n", r);
return r;
}
adev->mes.sch_ctx_gpu_addr =
adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
adev->mes.sch_ctx_ptr =
(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
if (r) {
dev_err(adev->dev,
"(%d) query_status_fence_offs wb alloc failed\n", r);
return r;
}
adev->mes.query_status_fence_gpu_addr =
adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
adev->mes.query_status_fence_ptr =
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
return 0;
}
static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
uint32_t tmp;
mqd->header = 0xC0310800;
mqd->compute_pipelinestat_enable = 0x00000001;
mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
mqd->compute_misc_reserved = 0x00000003;
eop_base_addr = ring->eop_gpu_addr >> 8;
mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(MES_EOP_SIZE / 4) - 1));
mqd->cp_hqd_eop_control = tmp;
/* enable doorbell? */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
if (ring->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
}
else
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
mqd->cp_hqd_pq_doorbell_control = tmp;
/* disable the queue if it's active */
ring->wptr = 0;
mqd->cp_hqd_dequeue_request = 0;
mqd->cp_hqd_pq_rptr = 0;
mqd->cp_hqd_pq_wptr_lo = 0;
mqd->cp_hqd_pq_wptr_hi = 0;
/* set the pointer to the MQD */
mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
/* set MQD vmid to 0 */
tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = ring->gpu_addr >> 8;
mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(ring->ring_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
tmp = 0;
/* enable the doorbell if requested */
if (ring->use_doorbell) {
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, ring->doorbell_index);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
}
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
mqd->cp_hqd_persistent_state = tmp;
/* set MIN_IB_AVAIL_SIZE */
tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
/* activate the queue */
mqd->cp_hqd_active = 1;
return 0;
}
static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
{
struct v10_compute_mqd *mqd = ring->mqd_ptr;
struct amdgpu_device *adev = ring->adev;
uint32_t data = 0;
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, 3, 0, 0, 0);
/* set CP_HQD_VMID.VMID = 0. */
data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
/* set CP_MQD_CONTROL.VMID=0 */
data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* set CP_HQD_PQ_CONTROL */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* set CP_HQD_PQ_DOORBELL_CONTROL */
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
/* set CP_HQD_ACTIVE.ACTIVE=1 */
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
#if 0
static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
int r;
if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
return -EINVAL;
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
return r;
}
kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
DRM_ERROR("kfq enable failed\n");
kiq_ring->sched.ready = false;
}
return r;
}
#endif
static int mes_v10_1_queue_init(struct amdgpu_device *adev)
{
int r;
r = mes_v10_1_mqd_init(&adev->mes.ring);
if (r)
return r;
#if 0
r = mes_v10_1_kiq_enable_queue(adev);
if (r)
return r;
#else
mes_v10_1_queue_init_register(&adev->mes.ring);
#endif
return 0;
}
static int mes_v10_1_ring_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
int r;
ring = &adev->mes.ring;
ring->funcs = &mes_v10_1_ring_funcs;
ring->me = 3;
ring->pipe = 0;
ring->queue = 0;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
ring->no_scheduler = true;
sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
return 0;
}
static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
{
int r, mqd_size = sizeof(struct v10_compute_mqd);
struct amdgpu_ring *ring = &adev->mes.ring;
if (ring->mqd_obj)
return 0;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
/* prepare MQD backup */
adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
if (!adev->mes.mqd_backup)
dev_warn(adev->dev,
"no memory to create MQD backup for ring %s\n",
ring->name);
return 0;
}
static int mes_v10_1_sw_init(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mes.adev = adev;
adev->mes.funcs = &mes_v10_1_funcs;
r = mes_v10_1_init_microcode(adev);
if (r)
return r;
r = mes_v10_1_allocate_eop_buf(adev);
if (r)
return r;
r = mes_v10_1_mqd_sw_init(adev);
if (r)
return r;
r = mes_v10_1_ring_init(adev);
if (r)
return r;
r = mes_v10_1_allocate_mem_slots(adev);
if (r)
return r;
return 0;
}
@ -299,6 +917,19 @@ static int mes_v10_1_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
kfree(adev->mes.mqd_backup);
amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
&adev->mes.ring.mqd_gpu_addr,
&adev->mes.ring.mqd_ptr);
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
&adev->mes.eop_gpu_addr,
NULL);
mes_v10_1_free_microcode(adev);
return 0;
@ -315,14 +946,29 @@ static int mes_v10_1_hw_init(void *handle)
DRM_ERROR("failed to MES fw, r=%d\n", r);
return r;
}
} else {
DRM_ERROR("only support direct fw loading on MES\n");
return -EINVAL;
}
mes_v10_1_enable(adev, true);
r = mes_v10_1_queue_init(adev);
if (r)
goto failure;
r = mes_v10_1_set_hw_resources(&adev->mes);
if (r)
goto failure;
r = mes_v10_1_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
goto failure;
}
return 0;
failure:
mes_v10_1_hw_fini(adev);
return r;
}
static int mes_v10_1_hw_fini(void *handle)

View file

@ -31,6 +31,11 @@
#include "soc15_common.h"
#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
@ -164,6 +169,10 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp);
tmp = mmMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
@ -363,9 +372,16 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
{
uint32_t def, data, def1, data1;
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
break;
}
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
@ -388,11 +404,20 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
if (def != data)
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
if (def != data)
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
break;
default:
if (def != data)
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
break;
}
}
static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@ -400,15 +425,30 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
break;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
break;
}
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
if (def != data)
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
if (def != data) {
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
break;
default:
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
break;
}
}
}
int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@ -421,6 +461,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
mmhub_v2_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
@ -440,9 +481,16 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
break;
}
/* AMD_CG_SUPPORT_MC_MGCG */
if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) &&

View file

@ -34,6 +34,9 @@
#define MAX_REARM_RETRY 10
#define mmIH_CHICKEN_Sienna_Cichlid 0x018d
#define mmIH_CHICKEN_Sienna_Cichlid_BASE_IDX 0
static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
/**
@ -265,10 +268,20 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
if (ih->use_bus_addr) {
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid, ih_chicken);
break;
default:
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
break;
}
}
}

View file

@ -33,6 +33,13 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
#define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6
#define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2
#define mmBIF_SDMA3_DOORBELL_RANGE 0x01d7
#define mmBIF_SDMA3_DOORBELL_RANGE_BASE_IDX 2
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
{
@ -81,7 +88,9 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
int doorbell_size)
{
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
instance == 1 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) :
instance == 2 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA2_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA3_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
@ -103,7 +112,8 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance)
{
u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
u32 reg = instance ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE) :
SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);

View file

@ -41,6 +41,7 @@
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "smuio/smuio_11_0_0_offset.h"
#include "mp/mp_11_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
@ -52,8 +53,11 @@
#include "navi10_ih.h"
#include "gfx_v10_0.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
#include "dce_virtual.h"
#include "mes_v10_1.h"
#include "mxgpu_nv.h"
@ -188,10 +192,8 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
#if 0 /* TODO: will set it when SDMA header is available */
{ SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
{ SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
#endif
{ SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
@ -256,31 +258,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
#if 0
static void nv_gpu_pci_config_reset(struct amdgpu_device *adev)
{
u32 i;
dev_info(adev->dev, "GPU pci config reset\n");
/* disable BM */
pci_clear_master(adev->pdev);
/* reset */
amdgpu_pci_config_reset(adev);
udelay(100);
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
u32 memsize = nbio_v2_3_get_memsize(adev);
if (memsize != 0xffffffff)
break;
udelay(1);
}
}
#endif
static int nv_asic_mode1_reset(struct amdgpu_device *adev)
{
u32 i;
@ -338,15 +315,6 @@ nv_asic_reset_method(struct amdgpu_device *adev)
static int nv_asic_reset(struct amdgpu_device *adev)
{
/* FIXME: it doesn't work since vega10 */
#if 0
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
nv_gpu_pci_config_reset(adev);
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
#endif
int ret = 0;
struct smu_context *smu = &adev->smu;
@ -442,6 +410,9 @@ legacy_init:
case CHIP_NAVI12:
navi12_reg_base_init(adev);
break;
case CHIP_SIENNA_CICHLID:
sienna_cichlid_reg_base_init(adev);
break;
default:
return -EINVAL;
}
@ -515,6 +486,28 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;
case CHIP_SIENNA_CICHLID:
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
if (adev->enable_mes)
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
break;
default:
return -EINVAL;
}
@ -550,7 +543,6 @@ static bool nv_need_full_reset(struct amdgpu_device *adev)
static bool nv_need_reset_on_init(struct amdgpu_device *adev)
{
#if 0
u32 sol_reg;
if (adev->flags & AMD_IS_APU)
@ -562,8 +554,7 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
if (sol_reg)
return true;
#endif
/* TODO: re-enable it when mode1 reset is functional */
return false;
}
@ -592,8 +583,11 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;
adev->doorbell_index.sdma_engine[3] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3;
adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
@ -714,7 +708,8 @@ static int nv_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
AMD_PG_SUPPORT_ATHUB |
AMD_PG_SUPPORT_MMHUB;
/* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
* as a consequence, the rev_id and external_rev_id are wrong.
* workaround it by hardcoding rev_id to 0 (default value).
@ -723,6 +718,23 @@ static int nv_common_early_init(void *handle)
adev->rev_id = 0;
adev->external_rev_id = adev->rev_id + 0xa;
break;
case CHIP_SIENNA_CICHLID:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_IH_CG |
AMD_CG_SUPPORT_MC_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
adev->external_rev_id = adev->rev_id + 0x28;
break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@ -889,6 +901,16 @@ static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev,
RC_MEM_POWER_DS_EN, enable);
}
/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
* be set for SRAM LS/DS/SD */
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
AMD_CG_SUPPORT_HDP_SD)) {
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
IPH_MEM_POWER_CTRL_EN, 1);
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
RC_MEM_POWER_CTRL_EN, 1);
}
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
/* restore IPH & RC clock override after clock/power mode changing */
@ -938,6 +960,7 @@ static int nv_common_set_clockgating_state(void *handle,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,

View file

@ -32,4 +32,5 @@ int nv_set_ip_blocks(struct amdgpu_device *adev);
int navi10_reg_base_init(struct amdgpu_device *adev);
int navi14_reg_base_init(struct amdgpu_device *adev);
int navi12_reg_base_init(struct amdgpu_device *adev);
int sienna_cichlid_reg_base_init(struct amdgpu_device *adev);
#endif

View file

@ -110,6 +110,7 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
@ -365,4 +366,11 @@ struct psp_gfx_rb_frame
/* total 64 bytes */
};
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
TEE_SUCCESS = 0x00000000,
TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */

View file

@ -55,6 +55,8 @@ MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_asd.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@ -95,6 +97,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_ARCTURUS:
chip_name = "arcturus";
break;
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
default:
BUG();
}
@ -103,9 +108,11 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
if (err)
return err;
err = psp_init_asd_microcode(psp, chip_name);
if (err)
return err;
if (adev->asic_type != CHIP_SIENNA_CICHLID) {
err = psp_init_asd_microcode(psp, chip_name);
if (err)
return err;
}
switch (adev->asic_type) {
case CHIP_VEGA20:
@ -165,6 +172,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
}
break;
case CHIP_SIENNA_CICHLID:
break;
default:
BUG();
}
@ -243,6 +252,39 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
return ret;
}
static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
/* Check tOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
if (psp_v11_0_is_sos_alive(psp))
return 0;
ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
/* Copy PSP SPL binary to memory */
memcpy(psp->fw_pri_buf, psp->spl_start_addr, psp->spl_bin_size);
/* Provide the PSP SPL to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
ret = psp_v11_0_wait_for_bootloader(psp);
return ret;
}
static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
{
int ret;
@ -354,7 +396,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
struct psp_ring *ring;
struct amdgpu_device *adev = psp->adev;
if (!amdgpu_sriov_vf(adev))
if ((!amdgpu_sriov_vf(adev)) &&
(adev->asic_type != CHIP_SIENNA_CICHLID))
psp_v11_0_reroute_ih(psp);
ring = &psp->km_ring;
@ -555,44 +598,6 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
return ret;
}
static void psp_v11_0_memory_training_fini(struct psp_context *psp)
{
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
kfree(ctx->sys_cache);
ctx->sys_cache = NULL;
}
static int psp_v11_0_memory_training_init(struct psp_context *psp)
{
int ret;
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
DRM_DEBUG("memory training is not supported!\n");
return 0;
}
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
if (ctx->sys_cache == NULL) {
DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
ret = -ENOMEM;
goto Err_out;
}
DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
ctx->train_data_size,
ctx->p2c_train_data_offset,
ctx->c2p_train_data_offset);
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
return 0;
Err_out:
psp_v11_0_memory_training_fini(psp);
return ret;
}
/*
* save and restore proces
*/
@ -813,6 +818,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
.bootloader_load_spl = psp_v11_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
.ring_init = psp_v11_0_ring_init,
@ -820,8 +826,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
.mode1_reset = psp_v11_0_mode1_reset,
.mem_training_init = psp_v11_0_memory_training_init,
.mem_training_fini = psp_v11_0_memory_training_fini,
.mem_training = psp_v11_0_memory_training,
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,

View file

@ -19,38 +19,24 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef _DMUB_CMD_DAL_H_
#define _DMUB_CMD_DAL_H_
#ifndef __SDMA_COMMON_H__
#define __SDMA_COMMON_H__
/*
* Command IDs should be treated as stable ABI.
* Do not reuse or modify IDs.
*/
enum dmub_cmd_psr_type {
DMUB_CMD__PSR_SET_VERSION = 0,
DMUB_CMD__PSR_COPY_SETTINGS = 1,
DMUB_CMD__PSR_ENABLE = 2,
DMUB_CMD__PSR_DISABLE = 3,
DMUB_CMD__PSR_SET_LEVEL = 4,
enum sdma_utcl2_cache_read_policy {
CACHE_READ_POLICY_L2__LRU = 0x00000000,
CACHE_READ_POLICY_L2__STREAM = 0x00000001,
CACHE_READ_POLICY_L2__NOA = 0x00000002,
CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
};
enum psr_version {
PSR_VERSION_1 = 0,
PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF,
enum sdma_utcl2_cache_write_policy {
CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
};
enum dmub_cmd_abm_type {
DMUB_CMD__ABM_INIT_CONFIG = 0,
DMUB_CMD__ABM_SET_PIPE = 1,
DMUB_CMD__ABM_SET_BACKLIGHT = 2,
DMUB_CMD__ABM_SET_LEVEL = 3,
DMUB_CMD__ABM_SET_AMBIENT_LEVEL = 4,
DMUB_CMD__ABM_SET_PWM_FRAC = 5,
};
#endif /* _DMUB_CMD_DAL_H_ */
#endif /* __SDMA_COMMON_H__ */

View file

@ -505,6 +505,36 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
}
}
static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
{
int i;
/*
* The only chips with SDMAv4 and ULV are VG10 and VG20.
* Server SKUs take a different hysteresis setting from other SKUs.
*/
switch (adev->asic_type) {
case CHIP_VEGA10:
if (adev->pdev->device == 0x6860)
break;
return;
case CHIP_VEGA20:
if (adev->pdev->device == 0x66a1)
break;
return;
default:
return;
}
for (i = 0; i < adev->sdma.num_instances; i++) {
uint32_t temp;
temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0);
WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp);
}
}
static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
{
int err = 0;
@ -529,8 +559,8 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (adev->sdma.instance[i].fw != NULL)
release_firmware(adev->sdma.instance[i].fw);
release_firmware(adev->sdma.instance[i].fw);
adev->sdma.instance[i].fw = NULL;
/* arcturus shares the same FW memory across
all SDMA isntances */
@ -1774,7 +1804,7 @@ static int sdma_v4_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)
if (adev->flags & AMD_IS_APU)
adev->sdma.num_instances = 1;
else if (adev->asic_type == CHIP_ARCTURUS)
adev->sdma.num_instances = 8;
@ -1813,6 +1843,8 @@ static int sdma_v4_0_late_init(void *handle)
.cb = sdma_v4_0_process_ras_data_cb,
};
sdma_v4_0_setup_ulv(adev);
if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
@ -1912,9 +1944,7 @@ static int sdma_v4_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_powergating_by_smu) ||
(adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
if (!amdgpu_sriov_vf(adev))
@ -1941,9 +1971,7 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
&& adev->powerplay.pp_funcs->set_powergating_by_smu) ||
adev->asic_type == CHIP_RENOIR)
if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
return 0;
@ -2202,6 +2230,7 @@ static int sdma_v4_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
sdma_v4_1_update_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : false);
break;

View file

@ -40,6 +40,7 @@
#include "soc15.h"
#include "navi10_sdma_pkt_open.h"
#include "nbio_v2_3.h"
#include "sdma_common.h"
#include "sdma_v5_0.h"
MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
@ -1298,8 +1299,12 @@ static int sdma_v5_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
for (i = 0; i < adev->sdma.num_instances; i++) {
release_firmware(adev->sdma.instance[i].fw);
adev->sdma.instance[i].fw = NULL;
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
}
return 0;
}

View file

@ -24,21 +24,6 @@
#ifndef __SDMA_V5_0_H__
#define __SDMA_V5_0_H__
enum sdma_v5_0_utcl2_cache_read_policy {
CACHE_READ_POLICY_L2__LRU = 0x00000000,
CACHE_READ_POLICY_L2__STREAM = 0x00000001,
CACHE_READ_POLICY_L2__NOA = 0x00000002,
CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
};
enum sdma_v5_0_utcl2_cache_write_policy {
CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
};
extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,30 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __SDMA_V5_2_H__
#define __SDMA_V5_2_H__
extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
#endif /* __SDMA_V5_2_H__ */

View file

@ -41,9 +41,11 @@
#include "si_dma.h"
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
#include "dce_virtual.h"
#include "gca/gfx_6_0_d.h"
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
#include "gmc/gmc_6_0_d.h"
#include "dce/dce_6_0_d.h"
#include "uvd/uvd_4_0_d.h"
@ -973,6 +975,28 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags;
u32 r;
spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
r = RREG32(mmUVD_CTX_DATA);
spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
return r;
}
static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags;
spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
WREG32(mmUVD_CTX_DATA, (v));
spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
{GRBM_STATUS},
{mmGRBM_STATUS2},
@ -1443,8 +1467,8 @@ static int si_common_early_init(void *handle)
adev->pcie_wreg = &si_pcie_wreg;
adev->pciep_rreg = &si_pciep_rreg;
adev->pciep_wreg = &si_pciep_wreg;
adev->uvd_ctx_rreg = NULL;
adev->uvd_ctx_wreg = NULL;
adev->uvd_ctx_rreg = si_uvd_ctx_rreg;
adev->uvd_ctx_wreg = si_uvd_ctx_wreg;
adev->didt_rreg = NULL;
adev->didt_wreg = NULL;
@ -2173,7 +2197,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_OLAND:
@ -2187,8 +2211,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_HAINAN:

View file

@ -781,7 +781,7 @@ struct NISLANDS_SMC_SWSTATE
uint8_t levelCount;
uint8_t padding2;
uint8_t padding3;
NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[1];
NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[];
};
typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE;

View file

@ -27,6 +27,8 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev);

View file

@ -1646,9 +1646,10 @@
/*
* PM4
*/
#define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \
(((reg) >> 2) & 0xFFFF) | \
((n) & 0x3FFF) << 16)
#define PACKET_TYPE0 0
#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
((reg) & 0xFFFF) | \
((n) & 0x3FFF) << 16)
#define CP_PACKET2 0x80000000
#define PACKET2_PAD_SHIFT 0
#define PACKET2_PAD_MASK (0x3fffffff << 0)
@ -2340,11 +2341,6 @@
# define NI_INPUT_GAMMA_XVYCC_222 3
# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
#define IH_RB_WPTR__RB_OVERFLOW_MASK 0x1
#define IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK 0x80000000
#define SRBM_STATUS__IH_BUSY_MASK 0x20000
#define SRBM_SOFT_RESET__SOFT_RESET_IH_MASK 0x400
#define BLACKOUT_MODE_MASK 0x00000007
#define VGA_RENDER_CONTROL 0xC0
#define R_000300_VGA_RENDER_CONTROL 0xC0
@ -2431,18 +2427,6 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
#define SRBM_STATUS__MCB_BUSY_MASK 0x200
#define SRBM_STATUS__MCB_BUSY__SHIFT 0x9
#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK 0x400
#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY__SHIFT 0xa
#define SRBM_STATUS__MCC_BUSY_MASK 0x800
#define SRBM_STATUS__MCC_BUSY__SHIFT 0xb
#define SRBM_STATUS__MCD_BUSY_MASK 0x1000
#define SRBM_STATUS__MCD_BUSY__SHIFT 0xc
#define SRBM_STATUS__VMC_BUSY_MASK 0x100
#define SRBM_STATUS__VMC_BUSY__SHIFT 0x8
#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
@ -2467,8 +2451,6 @@
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
#define CC_DRM_ID_STRAPS__ATI_REV_ID_MASK 0xf0000000
#define CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT 0x1c
#define PCIE_PORT_INDEX 0xe
#define PCIE_PORT_DATA 0xf
#define EVERGREEN_PIF_PHY0_INDEX 0x8

View file

@ -0,0 +1,54 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "nv.h"
#include "soc15_common.h"
#include "soc15_hw_ip.h"
#include "sienna_cichlid_ip_offset.h"
int sienna_cichlid_reg_base_init(struct amdgpu_device *adev)
{
/* HW has more IP blocks, only initialized the blocke needed by driver */
uint32_t i;
for (i = 0 ; i < MAX_INSTANCE ; ++i) {
adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
}
return 0;
}

View file

@ -415,7 +415,8 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
en = &soc15_allowed_read_registers[i];
if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
if (adev->reg_offset[en->hwip][en->inst] &&
reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ en->reg_offset))
continue;
@ -670,14 +671,25 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
int soc15_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
case CHIP_RENOIR:
vega10_reg_base_init(adev);
break;
case CHIP_RENOIR:
if (amdgpu_discovery) {
r = amdgpu_discovery_reg_base_init(adev);
if (r) {
DRM_WARN("failed to init reg base from ip discovery table, "
"fallback to legacy init method\n");
vega10_reg_base_init(adev);
}
}
break;
case CHIP_VEGA20:
vega20_reg_base_init(adev);
break;

View file

@ -50,18 +50,19 @@
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
({ int ret = 0; \
do { \
uint32_t old_ = 0; \
uint32_t old_ = 0; \
uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
uint32_t loop = adev->usec_timeout; \
ret = 0; \
while ((tmp_ & (mask)) != (expected_value)) { \
if (old_ != tmp_) { \
loop = adev->usec_timeout; \
old_ = tmp_; \
} else \
udelay(1); \
old_ = tmp_; \
} else \
udelay(1); \
tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
loop--; \
if (!loop) { \
@ -71,7 +72,9 @@
break; \
} \
} \
} while (0)
} while (0); \
ret; \
})
#define WREG32_RLC(reg, value) \
do { \

View file

@ -0,0 +1,793 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Sonny Jiang <sonny.jiang@amd.com>
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "sid.h"
#include "uvd/uvd_3_1_d.h"
#include "uvd/uvd_3_1_sh_mask.h"
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
/**
* uvd_v3_1_ring_get_rptr - get read pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware read pointer
*/
static uint64_t uvd_v3_1_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
return RREG32(mmUVD_RBC_RB_RPTR);
}
/**
* uvd_v3_1_ring_get_wptr - get write pointer
*
* @ring: amdgpu_ring pointer
*
* Returns the current hardware write pointer
*/
static uint64_t uvd_v3_1_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
return RREG32(mmUVD_RBC_RB_WPTR);
}
/**
* uvd_v3_1_ring_set_wptr - set write pointer
*
* @ring: amdgpu_ring pointer
*
* Commits the write pointer to the hardware
*/
static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
/**
* uvd_v3_1_ring_emit_ib - execute indirect buffer
*
* @ring: amdgpu_ring pointer
* @ib: indirect buffer to execute
*
* Write ring commands to execute the indirect buffer
*/
static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
uint32_t flags)
{
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
amdgpu_ring_write(ring, ib->gpu_addr);
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
/**
* uvd_v3_1_ring_emit_fence - emit an fence & trap command
*
* @ring: amdgpu_ring pointer
* @fence: fence to emit
*
* Write a fence and a trap command to the ring.
*/
static void uvd_v3_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
amdgpu_ring_write(ring, 2);
}
/**
* uvd_v3_1_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
*
* Test if we can successfully write to the context register
*/
static int uvd_v3_1_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
unsigned i;
int r;
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
static void uvd_v3_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
int i;
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
amdgpu_ring_write(ring, 0);
}
}
static const struct amdgpu_ring_funcs uvd_v3_1_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
.no_user_fence = true,
.get_rptr = uvd_v3_1_ring_get_rptr,
.get_wptr = uvd_v3_1_ring_get_wptr,
.set_wptr = uvd_v3_1_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
14, /* uvd_v3_1_ring_emit_fence x1 no user fence */
.emit_ib_size = 4, /* uvd_v3_1_ring_emit_ib */
.emit_ib = uvd_v3_1_ring_emit_ib,
.emit_fence = uvd_v3_1_ring_emit_fence,
.test_ring = uvd_v3_1_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v3_1_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
};
static void uvd_v3_1_set_ring_funcs(struct amdgpu_device *adev)
{
adev->uvd.inst->ring.funcs = &uvd_v3_1_ring_funcs;
}
static void uvd_v3_1_set_dcm(struct amdgpu_device *adev,
bool sw_mode)
{
u32 tmp, tmp2;
WREG32_FIELD(UVD_CGC_GATE, REGS, 0);
tmp = RREG32(mmUVD_CGC_CTRL);
tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
(1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) |
(4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT);
if (sw_mode) {
tmp &= ~0x7ffff800;
tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK |
UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK |
(7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT);
} else {
tmp |= 0x7ffff800;
tmp2 = 0;
}
WREG32(mmUVD_CGC_CTRL, tmp);
WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
}
/**
* uvd_v3_1_mc_resume - memory controller programming
*
* @adev: amdgpu_device pointer
*
* Let the UVD memory controller know it's offsets
*/
static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
{
uint64_t addr;
uint32_t size;
/* programm the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
addr += size;
size = AMDGPU_UVD_HEAP_SIZE >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr);
WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
addr += size;
size = (AMDGPU_UVD_STACK_SIZE +
(AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr);
WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
/* bits 28-31 */
addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
/* bits 32-39 */
addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
/**
* uvd_v3_1_fw_validate - FW validation operation
*
* @adev: amdgpu_device pointer
*
* Initialate and check UVD validation.
*/
static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)
{
void *ptr;
uint32_t ucode_len, i;
uint32_t keysel;
ptr = adev->uvd.inst[0].cpu_addr;
ptr += 192 + 16;
memcpy(&ucode_len, ptr, 4);
ptr += ucode_len;
memcpy(&keysel, ptr, 4);
WREG32(mmUVD_FW_START, keysel);
for (i = 0; i < 10; ++i) {
mdelay(10);
if (RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__DONE_MASK)
break;
}
if (i == 10)
return -ETIMEDOUT;
if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__PASS_MASK))
return -EINVAL;
for (i = 0; i < 10; ++i) {
mdelay(10);
if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__BUSY_MASK))
break;
}
if (i == 10)
return -ETIMEDOUT;
return 0;
}
/**
* uvd_v3_1_start - start UVD block
*
* @adev: amdgpu_device pointer
*
* Setup and start the UVD block
*/
static int uvd_v3_1_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz;
int i, j, r;
u32 tmp;
/* disable byte swapping */
u32 lmi_swap_cntl = 0;
u32 mp_swap_cntl = 0;
/* set uvd busy */
WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2));
uvd_v3_1_set_dcm(adev, true);
WREG32(mmUVD_CGC_GATE, 0);
/* take UVD block out of reset */
WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
mdelay(5);
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
/* disable interupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
/* swap (8 in 32) RB and IB */
lmi_swap_cntl = 0xa;
mp_swap_cntl = 0;
#endif
WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
/* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
(1 << 21) | (1 << 9) | (1 << 20));
tmp = RREG32(mmUVD_MPC_CNTL);
WREG32(mmUVD_MPC_CNTL, tmp | 0x10);
WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
WREG32(mmUVD_MPC_SET_ALU, 0);
WREG32(mmUVD_MPC_SET_MUX, 0x88);
tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL);
WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10));
/* enable UMC */
WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
mdelay(10);
for (i = 0; i < 10; ++i) {
uint32_t status;
for (j = 0; j < 100; ++j) {
status = RREG32(mmUVD_STATUS);
if (status & 2)
break;
mdelay(10);
}
r = 0;
if (status & 2)
break;
DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
mdelay(10);
WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
mdelay(10);
r = -1;
}
if (r) {
DRM_ERROR("UVD not responding, giving up!!!\n");
return r;
}
/* enable interupt */
WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
/* force RBC into idle state */
WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
/* Set the write pointer delay */
WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
/* programm the 4GB memory segment for rptr and ring buffer */
WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
(0x7 << 16) | (0x1 << 31));
/* Initialize the ring buffer's read and write pointers */
WREG32(mmUVD_RBC_RB_RPTR, 0x0);
ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
/* set the ring address */
WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr);
/* Set ring buffer size */
rb_bufsz = order_base_2(ring->ring_size);
rb_bufsz = (0x1 << 8) | rb_bufsz;
WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
return 0;
}
/**
* uvd_v3_1_stop - stop UVD block
*
* @adev: amdgpu_device pointer
*
* stop the UVD block
*/
static void uvd_v3_1_stop(struct amdgpu_device *adev)
{
uint32_t i, j;
uint32_t status;
WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
for (i = 0; i < 10; ++i) {
for (j = 0; j < 100; ++j) {
status = RREG32(mmUVD_STATUS);
if (status & 2)
break;
mdelay(1);
}
if (status & 2)
break;
}
for (i = 0; i < 10; ++i) {
for (j = 0; j < 100; ++j) {
status = RREG32(mmUVD_LMI_STATUS);
if (status & 0xf)
break;
mdelay(1);
}
if (status & 0xf)
break;
}
/* Stall UMC and register bus before resetting VCPU */
WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
for (i = 0; i < 10; ++i) {
for (j = 0; j < 100; ++j) {
status = RREG32(mmUVD_LMI_STATUS);
if (status & 0x240)
break;
mdelay(1);
}
if (status & 0x240)
break;
}
WREG32_P(0x3D49, 0, ~(1 << 2));
WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9));
/* put LMI, VCPU, RBC etc... into reset */
WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
WREG32(mmUVD_STATUS, 0);
uvd_v3_1_set_dcm(adev, false);
}
static int uvd_v3_1_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
return 0;
}
static int uvd_v3_1_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: UVD TRAP\n");
amdgpu_fence_process(&adev->uvd.inst->ring);
return 0;
}
static const struct amdgpu_irq_src_funcs uvd_v3_1_irq_funcs = {
.set = uvd_v3_1_set_interrupt_state,
.process = uvd_v3_1_process_interrupt,
};
static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
{
adev->uvd.inst->irq.num_types = 1;
adev->uvd.inst->irq.funcs = &uvd_v3_1_irq_funcs;
}
static int uvd_v3_1_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->uvd.num_uvd_inst = 1;
uvd_v3_1_set_ring_funcs(adev);
uvd_v3_1_set_irq_funcs(adev);
return 0;
}
static int uvd_v3_1_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
/* UVD TRAP */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
r = amdgpu_uvd_sw_init(adev);
if (r)
return r;
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
r = amdgpu_uvd_entity_init(adev);
return r;
}
static int uvd_v3_1_sw_fini(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = amdgpu_uvd_suspend(adev);
if (r)
return r;
return amdgpu_uvd_sw_fini(adev);
}
static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
bool enable)
{
u32 orig, data;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
data |= 0x3fff;
WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
orig = data = RREG32(mmUVD_CGC_CTRL);
data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
if (orig != data)
WREG32(mmUVD_CGC_CTRL, data);
} else {
data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
data &= ~0x3fff;
WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
orig = data = RREG32(mmUVD_CGC_CTRL);
data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
if (orig != data)
WREG32(mmUVD_CGC_CTRL, data);
}
}
/**
* uvd_v3_1_hw_init - start and test UVD block
*
* @adev: amdgpu_device pointer
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
static int uvd_v3_1_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
uvd_v3_1_mc_resume(adev);
r = uvd_v3_1_fw_validate(adev);
if (r) {
DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r);
return r;
}
uvd_v3_1_enable_mgcg(adev, true);
amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
uvd_v3_1_start(adev);
r = amdgpu_ring_test_helper(ring);
if (r) {
DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r);
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
goto done;
}
tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
amdgpu_ring_write(ring, tmp);
amdgpu_ring_write(ring, 0xFFFFF);
tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
amdgpu_ring_write(ring, tmp);
amdgpu_ring_write(ring, 0xFFFFF);
tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
amdgpu_ring_write(ring, tmp);
amdgpu_ring_write(ring, 0xFFFFF);
/* Clear timeout status bits */
amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
amdgpu_ring_write(ring, 0x8);
amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
amdgpu_ring_write(ring, 3);
amdgpu_ring_commit(ring);
done:
if (!r)
DRM_INFO("UVD initialized successfully.\n");
return r;
}
/**
* uvd_v3_1_hw_fini - stop the hardware block
*
* @adev: amdgpu_device pointer
*
* Stop the UVD block, mark ring as not ready any more
*/
static int uvd_v3_1_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v3_1_stop(adev);
return 0;
}
static int uvd_v3_1_suspend(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = uvd_v3_1_hw_fini(adev);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
static int uvd_v3_1_resume(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = amdgpu_uvd_resume(adev);
if (r)
return r;
return uvd_v3_1_hw_init(adev);
}
static bool uvd_v3_1_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
static int uvd_v3_1_wait_for_idle(void *handle)
{
unsigned i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
return 0;
}
return -ETIMEDOUT;
}
static int uvd_v3_1_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uvd_v3_1_stop(adev);
WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK,
~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
mdelay(5);
return uvd_v3_1_start(adev);
}
static int uvd_v3_1_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
return 0;
}
static int uvd_v3_1_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
return 0;
}
static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.name = "uvd_v3_1",
.early_init = uvd_v3_1_early_init,
.late_init = NULL,
.sw_init = uvd_v3_1_sw_init,
.sw_fini = uvd_v3_1_sw_fini,
.hw_init = uvd_v3_1_hw_init,
.hw_fini = uvd_v3_1_hw_fini,
.suspend = uvd_v3_1_suspend,
.resume = uvd_v3_1_resume,
.is_idle = uvd_v3_1_is_idle,
.wait_for_idle = uvd_v3_1_wait_for_idle,
.soft_reset = uvd_v3_1_soft_reset,
.set_clockgating_state = uvd_v3_1_set_clockgating_state,
.set_powergating_state = uvd_v3_1_set_powergating_state,
};
const struct amdgpu_ip_block_version uvd_v3_1_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_UVD,
.major = 3,
.minor = 1,
.rev = 0,
.funcs = &uvd_v3_1_ip_funcs,
};

View file

@ -0,0 +1,29 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __UVD_V3_1_H__
#define __UVD_V3_1_H__
extern const struct amdgpu_ip_block_version uvd_v3_1_ip_block;
#endif

View file

@ -38,6 +38,7 @@
#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
#include "jpeg_v1_0.h"
#include "vcn_v1_0.h"
#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab
#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1
@ -360,68 +361,68 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
0xFFFFFFFF, 0);
offset = 0;
} else {
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
offset = size;
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
}
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
/* cache window 1: stack */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
0xFFFFFFFF, 0);
/* cache window 2: context */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
0xFFFFFFFF, 0);
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
}
@ -636,9 +637,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
/* enable sw clock gating control */
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@ -667,22 +668,21 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
/* turn off clock gating */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
/* turn on SUVD clock gating */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
int ret;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
@ -698,7 +698,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF);
} else {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
@ -712,7 +712,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF);
}
/* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
@ -728,7 +728,6 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
int ret;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
/* Before power off, this indicator has to be turned on */
@ -763,7 +762,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF);
}
}
@ -972,14 +971,14 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
/* disable interupt */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
0, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
/* initialize VCN memory controller */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
@ -993,48 +992,48 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* swap (8 in 32) RB and IB */
lmi_swap_cntl = 0xa;
#endif
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_CNTL,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_CNTL,
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXA0,
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXB0,
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUX,
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
vcn_v1_0_mc_resume_dpg_mode(adev);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
/* boot up the VCPU */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
/* enable UMC */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL2,
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT,
0xFFFFFFFF, 0);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
vcn_v1_0_clock_gating_dpg_mode(adev, 1);
/* setup mmUVD_LMI_CTRL */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
@ -1046,11 +1045,11 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
tmp = adev->gfx.config.gb_addr_config;
/* setup VCN global tiling registers */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
/* enable System Interrupt for JRBC */
WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN,
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SYS_INT_EN,
UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1);
/* force RBC into idle state */
@ -1112,15 +1111,15 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
*/
static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
{
int ret_code, tmp;
int tmp;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
/* put VCPU into reset */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
@ -1129,7 +1128,7 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
/* disable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
@ -1153,30 +1152,29 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
{
int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
@ -1219,9 +1217,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
/* pause DPG non-jpeg */
@ -1229,7 +1227,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Restore */
ring = &adev->vcn.inst->ring_enc[0];
@ -1251,7 +1249,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg non-jpeg, no need to wait */
@ -1275,9 +1273,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
/* Make sure JPRG Snoop is disabled before sending the pause */
@ -1290,7 +1288,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
/* Restore */
ring = &adev->jpeg.inst->ring_dec;
@ -1312,7 +1310,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg jpeg, no need to wait */
@ -1335,10 +1333,10 @@ static bool vcn_v1_0_is_idle(void *handle)
static int vcn_v1_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 0;
int ret;
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE, ret);
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE);
return ret;
}

View file

@ -30,6 +30,7 @@
#include "amdgpu_pm.h"
#include "amdgpu_psp.h"
#include "mmsch_v2_0.h"
#include "vcn_v2_0.h"
#include "vcn/vcn_2_0_0_offset.h"
#include "vcn/vcn_2_0_0_sh_mask.h"
@ -382,91 +383,91 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
offset = size;
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@ -615,19 +616,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
@ -696,7 +697,6 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
int ret;
if (amdgpu_sriov_vf(adev))
return;
@ -715,7 +715,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret);
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF);
} else {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
@ -728,7 +728,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF);
}
/* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS,
@ -746,7 +746,6 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
int ret;
if (amdgpu_sriov_vf(adev))
return;
@ -782,7 +781,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF);
}
}
@ -810,11 +809,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@ -826,28 +825,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@ -855,22 +854,22 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* release VCPU reset to boot */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
/* enable LMI MC and UMC channels */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_CTRL2),
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
@ -1098,25 +1097,24 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
{
int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
@ -1138,7 +1136,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
}
/* wait for uvd idle */
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
if (r)
return r;
@ -1146,7 +1144,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@ -1157,7 +1155,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@ -1208,9 +1206,8 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
ret_code = 0;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
@ -1221,7 +1218,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
/* wait for ACK */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Stall DPG before WPTR/RPTR reset */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
@ -1258,7 +1255,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg, no need to wait */
@ -1281,10 +1278,10 @@ static bool vcn_v2_0_is_idle(void *handle)
static int vcn_v2_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 0;
int ret;
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE, ret);
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE);
return ret;
}

View file

@ -451,91 +451,91 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
offset = size;
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@ -549,7 +549,6 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data;
int ret = 0;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@ -589,7 +588,7 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret);
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
@ -689,19 +688,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__MMSCH_MODE_MASK);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
@ -792,11 +791,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@ -808,28 +807,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@ -837,26 +836,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* enable LMI MC and UMC channels */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
/* unblock VCPU register access */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
@ -1302,25 +1301,24 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
@ -1343,7 +1341,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
}
/* wait for vcn idle */
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
if (r)
return r;
@ -1351,7 +1349,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@ -1362,7 +1360,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@ -1412,8 +1410,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
@ -1425,7 +1423,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
/* wait for ACK */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Stall DPG before WPTR/RPTR reset */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
@ -1458,13 +1456,13 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
@ -1701,8 +1699,8 @@ static int vcn_v2_5_wait_for_idle(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE, ret);
ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
UVD_STATUS__IDLE);
if (ret)
return ret;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __VCN_V3_0_H__
#define __VCN_V3_0_H__
extern const struct amdgpu_ip_block_version vcn_v3_0_ip_block;
#endif /* __VCN_V3_0_H__ */

View file

@ -679,169 +679,175 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf810000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf820001, 0xbf8201c1,
static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf820001, 0xbf8201cd,
0xb0804004, 0xb978f802,
0x8a788678, 0xb971f803,
0x876eff71, 0x00000400,
0xbf850033, 0x876eff71,
0x00000100, 0xbf840002,
0x8878ff78, 0x00002000,
0x8a77ff77, 0xff000000,
0xb96ef807, 0x876fff6e,
0x02000000, 0x8f6f866f,
0x88776f77, 0x876fff6e,
0x003f8000, 0x8f6f896f,
0x88776f77, 0x8a6eff6e,
0x023f8000, 0xb9eef807,
0xb97af812, 0xb97bf813,
0x8ffa887a, 0xf4051bbd,
0xfa000000, 0xbf8cc07f,
0xf4051ebd, 0xfa000008,
0xbf8cc07f, 0x87ee6e6e,
0xbf840001, 0xbe80206e,
0xb971f803, 0x8771ff71,
0x000001ff, 0xbf850002,
0x806c846c, 0x826d806d,
0x876dff6d, 0x0000ffff,
0x906e8977, 0x876fff6e,
0x003f8000, 0x906e8677,
0x876eff6e, 0x02000000,
0x886e6f6e, 0xb9eef807,
0x87fe7e7e, 0x87ea6a6a,
0xb9f8f802, 0xbe80226c,
0xb971f803, 0x8771ff71,
0x00000100, 0xbf840006,
0xbef60380, 0xb9f60203,
0x876dff6d, 0x0000ffff,
0x80ec886c, 0x82ed806d,
0xbef60380, 0xb9f60283,
0xb972f816, 0xb9762c07,
0x8f769a76, 0x886d766d,
0xb97603c7, 0x8f769976,
0x886d766d, 0xb9760647,
0x8f769876, 0x886d766d,
0xb976f807, 0x8776ff76,
0x00007fff, 0xb9f6f807,
0x8a788678, 0xb96ef801,
0x876eff6e, 0x00000800,
0xbf840003, 0x876eff78,
0x00002000, 0xbf840009,
0xb97bf803, 0x876eff7b,
0x00000400, 0xbf850033,
0x876eff7b, 0x00000100,
0xbf840002, 0x8878ff78,
0x00002000, 0x8a77ff77,
0xff000000, 0xb96ef807,
0x876fff6e, 0x02000000,
0x8f6f866f, 0x88776f77,
0x876fff6e, 0x003f8000,
0x8f6f896f, 0x88776f77,
0x8a6eff6e, 0x023f8000,
0xb9eef807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
0xf4051bbd, 0xfa000000,
0xbf8cc07f, 0xf4051ebd,
0xfa000008, 0xbf8cc07f,
0x87ee6e6e, 0xbf840001,
0xbe80206e, 0xb97bf803,
0x877bff7b, 0x000001ff,
0xbf850002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x906e8977,
0x876fff6e, 0x003f8000,
0x906e8677, 0x876eff6e,
0x02000000, 0x886e6f6e,
0xb9eef807, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
0xbe80226c, 0x876dff6d,
0x0000ffff, 0xbefa0380,
0xb9fa0283, 0xb97a2c07,
0x8f7a9a7a, 0x886d7a6d,
0xb97a03c7, 0x8f7a997a,
0x886d7a6d, 0xb97a0647,
0x8f7a987a, 0x886d7a6d,
0xb97af807, 0x877aff7a,
0x00007fff, 0xb9faf807,
0xbeee037e, 0xbeef037f,
0xbefe0480, 0xbf900004,
0xbf8e0002, 0xbf88fffe,
0xb97b02dc, 0x8f7b997b,
0x887b7b7f, 0xb97a2a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
0x877bff7f, 0x0000ffff,
0x807aff7a, 0x00000200,
0x807a7e7a, 0x827b807b,
0xf4491c3d, 0xfa000050,
0xf4491d3d, 0xfa000060,
0xf4411e7d, 0xfa000074,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
0x8776ff7f, 0x08000000,
0x90768376, 0x88777677,
0x8776ff7f, 0x70000000,
0x90768176, 0x88777677,
0xbefb037c, 0xbefa0380,
0x877aff7f, 0x08000000,
0x907a837a, 0x88777a77,
0x877aff7f, 0x70000000,
0x907a817a, 0x88777a77,
0xbef1037c, 0xbef00380,
0xb97302dc, 0x8f739973,
0x8873737f, 0xb97a2a05,
0x807a817a, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
0xb9761e06, 0x8f768a76,
0x807a767a, 0x807aff7a,
0x00000200, 0xbef603ff,
0x01000000, 0xbefe037c,
0xbefc037a, 0xf4611efa,
0xf8000000, 0x807a847a,
0xbefc037e, 0xbefe037c,
0xbefc037a, 0xf4611b3a,
0xf8000000, 0x807a847a,
0xbefc037e, 0xbefe037c,
0xbefc037a, 0xf4611b7a,
0xf8000000, 0x807a847a,
0xbefc037e, 0xbefe037c,
0xbefc037a, 0xf4611bba,
0xf8000000, 0x807a847a,
0xbefc037e, 0xbefe037c,
0xbefc037a, 0xf4611bfa,
0xf8000000, 0x807a847a,
0xbefc037e, 0xbefe037c,
0xbefc037a, 0xf4611e3a,
0xf8000000, 0x807a847a,
0xbefc037e, 0xb971f803,
0xbefe037c, 0xbefc037a,
0xf4611c7a, 0xf8000000,
0x807a847a, 0xbefc037e,
0xbefe037c, 0xbefc037a,
0xf4611cba, 0xf8000000,
0x807a847a, 0xbefc037e,
0xb97bf801, 0xbefe037c,
0xbefc037a, 0xf4611efa,
0xf8000000, 0x807a847a,
0xbefc037e, 0xb97bf814,
0xbefe037c, 0xbefc037a,
0xf4611efa, 0xf8000000,
0x807a847a, 0xbefc037e,
0xb97bf815, 0xbefe037c,
0xbefc037a, 0xf4611efa,
0xf8000000, 0x807a847a,
0xbefc037e, 0x8776ff7f,
0x04000000, 0xbeef0380,
0x886f6f76, 0xb97a2a05,
0x807a817a, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
0xb9761e06, 0x8f768a76,
0x807a767a, 0xbef603ff,
0x01000000, 0xbef20374,
0x80747a74, 0x82758075,
0xbefc0380, 0xbf800000,
0xbe802f00, 0xbe822f02,
0xbe842f04, 0xbe862f06,
0xbe882f08, 0xbe8a2f0a,
0xbe8c2f0c, 0xbe8e2f0e,
0xf469003a, 0xfa000000,
0xf469013a, 0xfa000010,
0xf469023a, 0xfa000020,
0xf469033a, 0xfa000030,
0x8074c074, 0x82758075,
0x807c907c, 0xbf0aff7c,
0x00000060, 0xbf85ffea,
0xbe802f00, 0xbe822f02,
0xbe842f04, 0xbe862f06,
0xbe882f08, 0xbe8a2f0a,
0xf469003a, 0xfa000000,
0xf469013a, 0xfa000010,
0xf469023a, 0xfa000020,
0x8074b074, 0x82758075,
0xbef40372, 0xbefa0380,
0x8873737f, 0xb97bf816,
0xba80f816, 0x00000000,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
0xbf82000b, 0xbef603ff,
0x01000000, 0xe0704000,
0x7a5d0000, 0xe0704080,
0x7a5d0100, 0xe0704100,
0x7a5d0200, 0xe0704180,
0x7a5d0300, 0xbf82000a,
0x705d0000, 0xe0704080,
0x705d0100, 0xe0704100,
0x705d0200, 0xe0704180,
0x705d0300, 0xbf82000a,
0xbef603ff, 0x01000000,
0xe0704000, 0x7a5d0000,
0xe0704100, 0x7a5d0100,
0xe0704200, 0x7a5d0200,
0xe0704300, 0x7a5d0300,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
0xe0704300, 0x705d0300,
0xb9702a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
0x8f7a8a7a, 0x80707a70,
0x8070ff70, 0x00000200,
0xbef603ff, 0x01000000,
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611b3a, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611b7a, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611bba, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611bfa, 0xf8000000,
0x80708470, 0xbefc037e,
0xbefe037c, 0xbefc0370,
0xf4611e3a, 0xf8000000,
0x80708470, 0xbefc037e,
0xb97af803, 0xbefe037c,
0xbefc0370, 0xf4611eba,
0xf8000000, 0x80708470,
0xbefc037e, 0xbefe037c,
0xbefc0370, 0xf4611efa,
0xf8000000, 0x80708470,
0xbefc037e, 0xb971f801,
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf8000000,
0x80708470, 0xbefc037e,
0xb971f814, 0xbefe037c,
0xbefc0370, 0xf4611c7a,
0xf8000000, 0x80708470,
0xbefc037e, 0xb971f815,
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf8000000,
0x80708470, 0xbefc037e,
0xb9702a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
0x8f7a8a7a, 0x80707a70,
0xbef603ff, 0x01000000,
0xbefb0374, 0x80747074,
0x82758075, 0xbefc0380,
0xbf800000, 0xbe802f00,
0xbe822f02, 0xbe842f04,
0xbe862f06, 0xbe882f08,
0xbe8a2f0a, 0xbe8c2f0c,
0xbe8e2f0e, 0xf469003a,
0xfa000000, 0xf469013a,
0xfa000010, 0xf469023a,
0xfa000020, 0xf469033a,
0xfa000030, 0x8074c074,
0x82758075, 0x807c907c,
0xbf0aff7c, 0x00000060,
0xbf85ffea, 0xbe802f00,
0xbe822f02, 0xbe842f04,
0xbe862f06, 0xbe882f08,
0xbe8a2f0a, 0xf469003a,
0xfa000000, 0xf469013a,
0xfa000010, 0xf469023a,
0xfa000020, 0x8074b074,
0x82758075, 0xbef4037b,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb9714306, 0x8771c171,
0xbf840046, 0xbf8a0000,
0x8776ff6f, 0x04000000,
0xbf840042, 0x8f718671,
0x8f718271, 0xbef60371,
0xb97a2a05, 0x807a817a,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850002,
0x8f7a897a, 0xbf820001,
0x8f7a8a7a, 0xb9761e06,
0x8f768a76, 0x807a767a,
0x807aff7a, 0x00000200,
0x807aff7a, 0x00000080,
0xb97b4306, 0x877bc17b,
0xbf840044, 0xbf8a0000,
0x877aff73, 0x04000000,
0xbf840040, 0x8f7b867b,
0x8f7b827b, 0xbef6037b,
0xb9702a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
0x8f7a8a7a, 0x80707a70,
0x8070ff70, 0x00000200,
0x8070ff70, 0x00000080,
0xbef603ff, 0x01000000,
0xd7650000, 0x000100c1,
0xd7660000, 0x000200c1,
@ -852,87 +858,86 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x7a5d0100,
0x807c037c, 0x807a037a,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000080, 0xbf0a717c,
0x00000080, 0xbf0a7b7c,
0xbf85fff4, 0xbf820011,
0xbe8303ff, 0x00000100,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x7a5d0100,
0x807c037c, 0x807a037a,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000100, 0xbf0a717c,
0x00000100, 0xbf0a7b7c,
0xbf85fff4, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850004,
0xbefa03ff, 0x00000200,
0xbef003ff, 0x00000200,
0xbeff0380, 0xbf820003,
0xbefa03ff, 0x00000400,
0xbeff03c1, 0xb9712a05,
0x80718171, 0x8f718271,
0xbef003ff, 0x00000400,
0xbeff03c1, 0xb97b2a05,
0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850017,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a717c,
0xbefc0384, 0xbf0a7b7c,
0xbf840037, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
0x7a5d0000, 0xe0704080,
0x7a5d0100, 0xe0704100,
0x7a5d0200, 0xe0704180,
0x7a5d0300, 0x807c847c,
0x807aff7a, 0x00000200,
0xbf0a717c, 0xbf85ffef,
0x705d0000, 0xe0704080,
0x705d0100, 0xe0704100,
0x705d0200, 0xe0704180,
0x705d0300, 0x807c847c,
0x8070ff70, 0x00000200,
0xbf0a7b7c, 0xbf85ffef,
0xbf820025, 0xbef603ff,
0x01000000, 0xbefc0384,
0xbf0a717c, 0xbf840020,
0xbf0a7b7c, 0xbf840020,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xe0704000, 0x7a5d0000,
0xe0704100, 0x7a5d0100,
0xe0704200, 0x7a5d0200,
0xe0704300, 0x7a5d0300,
0x807c847c, 0x807aff7a,
0x00000400, 0xbf0a717c,
0xbf85ffef, 0xb9711e06,
0x8771c171, 0xbf84000c,
0x8f718371, 0x80717c71,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
0xe0704300, 0x705d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
0xbf85ffef, 0xb97b1e06,
0x877bc17b, 0xbf84000c,
0x8f7b837b, 0x807b7c7b,
0xbefe03c1, 0xbeff0380,
0x7e008700, 0xe0704000,
0x7a5d0000, 0x807c817c,
0x807aff7a, 0x00000080,
0xbf0a717c, 0xbf85fff8,
0xbf820142, 0xbef4037e,
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
0xbf82014f, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
0x10807fac, 0x8772ff7f,
0x08000000, 0x90728372,
0x88777277, 0x8772ff7f,
0x70000000, 0x90728172,
0x88777277, 0xb97302dc,
0x8f739973, 0x8873737f,
0x8772ff7f, 0x04000000,
0xbf840036, 0xbefe03c1,
0x907c9973, 0x877c817c,
0x10807fac, 0x876eff7f,
0x08000000, 0x906e836e,
0x88776e77, 0x876eff7f,
0x70000000, 0x906e816e,
0x88776e77, 0xb97202dc,
0x8f729972, 0x8872727f,
0x876eff7f, 0x04000000,
0xbf840034, 0xbefe03c1,
0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
0x876fc16f, 0xbf84002b,
0x876fc16f, 0xbf840029,
0x8f6f866f, 0x8f6f826f,
0xbef6036f, 0xb9782a05,
0x80788178, 0x907c9973,
0x877c817c, 0xbf06817c,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
0xb9721e06, 0x8f728a72,
0x80787278, 0x8078ff78,
0xb96e1e06, 0x8f6e8a6e,
0x80786e78, 0x8078ff78,
0x00000200, 0x8078ff78,
0x00000080, 0xbef603ff,
0x01000000, 0x907c9973,
0x01000000, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbefc0380, 0xbf850009,
0xe0310000, 0x781d0000,
@ -944,15 +949,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x00000100, 0x8078ff78,
0x00000100, 0xbf0a6f7c,
0xbf85fff8, 0xbef80380,
0xbefe03c1, 0x907c9973,
0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb96f2a05, 0x806f816f,
0x8f6f826f, 0x907c9973,
0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850021, 0xbef603ff,
0x01000000, 0xbef20378,
0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
0xbefc0384, 0xe0304000,
0x785d0000, 0xe0304080,
@ -964,12 +969,12 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x807c847c, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85ffee, 0xe0304000,
0x725d0000, 0xe0304080,
0x725d0100, 0xe0304100,
0x725d0200, 0xe0304180,
0x725d0300, 0xbf820032,
0x6e5d0000, 0xe0304080,
0x6e5d0100, 0xe0304100,
0x6e5d0200, 0xe0304180,
0x6e5d0300, 0xbf820032,
0xbef603ff, 0x01000000,
0xbef20378, 0x8078ff78,
0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
@ -989,16 +994,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
0xbeff03c1, 0xe0304000,
0x725d0000, 0xe0304100,
0x725d0100, 0xe0304200,
0x725d0200, 0xe0304300,
0x725d0300, 0xbf8c3f70,
0x6e5d0000, 0xe0304100,
0x6e5d0100, 0xe0304200,
0x6e5d0200, 0xe0304300,
0x6e5d0300, 0xbf8c3f70,
0xb9782a05, 0x80788178,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb9721e06,
0x8f728a72, 0x80787278,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
@ -1021,23 +1025,22 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb9721e06,
0x8f728a72, 0x80787278,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
0xf4211efa, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
@ -1046,31 +1049,41 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
0xb9eef815, 0xbef2036d,
0x876dff72, 0x0000ffff,
0xbefc036f, 0xbefe037a,
0xbeff037b, 0x876f71ff,
0x000003ff, 0xb9ef4803,
0xb9f9f816, 0x876f71ff,
0xfffff800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
0x876fff72, 0xfc000000,
0x906f9a6f, 0x8f6f906f,
0xbef30380, 0x88736f73,
0x876fff72, 0x02000000,
0x906f996f, 0x8f6f8f6f,
0x88736f73, 0x876fff72,
0x01000000, 0x906f986f,
0x8f6f996f, 0x88736f73,
0x876fff70, 0x00800000,
0x906f976f, 0xb9f3f807,
0x87fe7e7e, 0x87ea6a6a,
0xb9f0f802, 0xbf8a0000,
0xbe80226c, 0xbf810000,
0xb9eef815, 0xbefc036f,
0xbefe0370, 0xbeff0371,
0x876f7bff, 0x000003ff,
0xb9ef4803, 0xb9f9f816,
0x876f7bff, 0xfffff800,
0x906f8b6f, 0xb9efa2c3,
0xb9f3f801, 0xb96e2a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
0xbf820001, 0x8f6e8a6e,
0x806eff6e, 0x00000200,
0x806e746e, 0x826f8075,
0x876fff6f, 0x0000ffff,
0xf4091c37, 0xfa000050,
0xf4091d37, 0xfa000060,
0xf4011e77, 0xfa000074,
0xbf8cc07f, 0x876fff6d,
0xfc000000, 0x906f9a6f,
0x8f6f906f, 0xbeee0380,
0x886e6f6e, 0x876fff6d,
0x02000000, 0x906f996f,
0x8f6f8f6f, 0x886e6f6e,
0x876fff6d, 0x01000000,
0x906f986f, 0x8f6f996f,
0x886e6f6e, 0x876fff7a,
0x00800000, 0x906f976f,
0xb9eef807, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9faf802,
0xbf8a0000, 0xbe80226c,
0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202c4,
0xb8f8f802, 0x89788678,
@ -1560,3 +1573,399 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf8a0000, 0x95806f6c,
0xbf810000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf820001, 0xbf8201cf,
0xb0804004, 0xb978f802,
0x8a788678, 0xb96ef801,
0x876eff6e, 0x00000800,
0xbf840003, 0x876eff78,
0x00002000, 0xbf840009,
0xb97bf803, 0x876eff7b,
0x00000400, 0xbf85001d,
0x876eff7b, 0x00000100,
0xbf840002, 0x8878ff78,
0x00002000, 0xb97af812,
0xb97bf813, 0x8ffa887a,
0xf4051bbd, 0xfa000000,
0xbf8cc07f, 0xf4051ebd,
0xfa000008, 0xbf8cc07f,
0x87ee6e6e, 0xbf840001,
0xbe80206e, 0xb97bf803,
0x877bff7b, 0x000001ff,
0xbf850002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
0xbe80226c, 0x876dff6d,
0x0000ffff, 0xbefa0380,
0xb9fa0283, 0xbeee037e,
0xbeef037f, 0xbefe0480,
0xbf900004, 0xbf8cc07f,
0xb97b02dc, 0x8f7b997b,
0x887b7b7f, 0xb97a2a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
0x877bff7f, 0x0000ffff,
0x807aff7a, 0x00000200,
0x807a7e7a, 0x827b807b,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
0x877aff7f, 0x08000000,
0x907a837a, 0x88777a77,
0x877aff7f, 0x70000000,
0x907a817a, 0x88777a77,
0xbef1037c, 0xbef00380,
0xb97302dc, 0x8f739973,
0x8873737f, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820002,
0xbeff03c1, 0xbf82000b,
0xbef603ff, 0x01000000,
0xe0704000, 0x705d0000,
0xe0704080, 0x705d0100,
0xe0704100, 0x705d0200,
0xe0704180, 0x705d0300,
0xbf82000a, 0xbef603ff,
0x01000000, 0xe0704000,
0x705d0000, 0xe0704100,
0x705d0100, 0xe0704200,
0x705d0200, 0xe0704300,
0x705d0300, 0xb9702a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
0xb97a1e06, 0x8f7a8a7a,
0x80707a70, 0x8070ff70,
0x00000200, 0xbef603ff,
0x01000000, 0x7e000280,
0x7e020280, 0x7e040280,
0xbefc0380, 0xd7610002,
0x0000f871, 0x807c817c,
0xd7610002, 0x0000f86c,
0x807c817c, 0xd7610002,
0x0000f86d, 0x807c817c,
0xd7610002, 0x0000f86e,
0x807c817c, 0xd7610002,
0x0000f86f, 0x807c817c,
0xd7610002, 0x0000f878,
0x807c817c, 0xb97af803,
0xd7610002, 0x0000f87a,
0x807c817c, 0xd7610002,
0x0000f87b, 0x807c817c,
0xb971f801, 0xd7610002,
0x0000f871, 0x807c817c,
0xb971f814, 0xd7610002,
0x0000f871, 0x807c817c,
0xb971f815, 0xd7610002,
0x0000f871, 0x807c817c,
0xbeff0380, 0xe0704000,
0x705d0200, 0xb9702a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
0xb97a1e06, 0x8f7a8a7a,
0x80707a70, 0xbef603ff,
0x01000000, 0xbef90380,
0xbefc0380, 0xbf800000,
0xbe802f00, 0xbe822f02,
0xbe842f04, 0xbe862f06,
0xbe882f08, 0xbe8a2f0a,
0xbe8c2f0c, 0xbe8e2f0e,
0xd7610002, 0x0000f200,
0x80798179, 0xd7610002,
0x0000f201, 0x80798179,
0xd7610002, 0x0000f202,
0x80798179, 0xd7610002,
0x0000f203, 0x80798179,
0xd7610002, 0x0000f204,
0x80798179, 0xd7610002,
0x0000f205, 0x80798179,
0xd7610002, 0x0000f206,
0x80798179, 0xd7610002,
0x0000f207, 0x80798179,
0xd7610002, 0x0000f208,
0x80798179, 0xd7610002,
0x0000f209, 0x80798179,
0xd7610002, 0x0000f20a,
0x80798179, 0xd7610002,
0x0000f20b, 0x80798179,
0xd7610002, 0x0000f20c,
0x80798179, 0xd7610002,
0x0000f20d, 0x80798179,
0xd7610002, 0x0000f20e,
0x80798179, 0xd7610002,
0x0000f20f, 0x80798179,
0xbf06a079, 0xbf840006,
0xe0704000, 0x705d0200,
0x8070ff70, 0x00000080,
0xbef90380, 0x7e040280,
0x807c907c, 0xbf0aff7c,
0x00000060, 0xbf85ffbc,
0xbe802f00, 0xbe822f02,
0xbe842f04, 0xbe862f06,
0xbe882f08, 0xbe8a2f0a,
0xd7610002, 0x0000f200,
0x80798179, 0xd7610002,
0x0000f201, 0x80798179,
0xd7610002, 0x0000f202,
0x80798179, 0xd7610002,
0x0000f203, 0x80798179,
0xd7610002, 0x0000f204,
0x80798179, 0xd7610002,
0x0000f205, 0x80798179,
0xd7610002, 0x0000f206,
0x80798179, 0xd7610002,
0x0000f207, 0x80798179,
0xd7610002, 0x0000f208,
0x80798179, 0xd7610002,
0x0000f209, 0x80798179,
0xd7610002, 0x0000f20a,
0x80798179, 0xd7610002,
0x0000f20b, 0x80798179,
0xe0704000, 0x705d0200,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
0xbf840044, 0xbf8a0000,
0x877aff73, 0x04000000,
0xbf840040, 0x8f7b867b,
0x8f7b827b, 0xbef6037b,
0xb9702a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
0x8f7a8a7a, 0x80707a70,
0x8070ff70, 0x00000200,
0x8070ff70, 0x00000080,
0xbef603ff, 0x01000000,
0xd7650000, 0x000100c1,
0xd7660000, 0x000200c1,
0x16000084, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbefc0380, 0xbf850012,
0xbe8303ff, 0x00000080,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000080, 0xbf0a7b7c,
0xbf85fff4, 0xbf820011,
0xbe8303ff, 0x00000100,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
0x00000100, 0xbf0a7b7c,
0xbf85fff4, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850004,
0xbef003ff, 0x00000200,
0xbeff0380, 0xbf820003,
0xbef003ff, 0x00000400,
0xbeff03c1, 0xb97b2a05,
0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850017,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
0xbf840037, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
0x705d0000, 0xe0704080,
0x705d0100, 0xe0704100,
0x705d0200, 0xe0704180,
0x705d0300, 0x807c847c,
0x8070ff70, 0x00000200,
0xbf0a7b7c, 0xbf85ffef,
0xbf820025, 0xbef603ff,
0x01000000, 0xbefc0384,
0xbf0a7b7c, 0xbf840020,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
0xe0704300, 0x705d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
0xbf85ffef, 0xb97b1e06,
0x877bc17b, 0xbf84000c,
0x8f7b837b, 0x807b7c7b,
0xbefe03c1, 0xbeff0380,
0x7e008700, 0xe0704000,
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
0xbf82013a, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
0x10807fac, 0x876eff7f,
0x08000000, 0x906e836e,
0x88776e77, 0x876eff7f,
0x70000000, 0x906e816e,
0x88776e77, 0xb97202dc,
0x8f729972, 0x8872727f,
0x876eff7f, 0x04000000,
0xbf840034, 0xbefe03c1,
0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
0x876fc16f, 0xbf840029,
0x8f6f866f, 0x8f6f826f,
0xbef6036f, 0xb9782a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
0xb96e1e06, 0x8f6e8a6e,
0x80786e78, 0x8078ff78,
0x00000200, 0x8078ff78,
0x00000080, 0xbef603ff,
0x01000000, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbefc0380, 0xbf850009,
0xe0310000, 0x781d0000,
0x807cff7c, 0x00000080,
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff8,
0xbf820008, 0xe0310000,
0x781d0000, 0x807cff7c,
0x00000100, 0x8078ff78,
0x00000100, 0xbf0a6f7c,
0xbf85fff8, 0xbef80380,
0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb96f2a05, 0x806f816f,
0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850021, 0xbef603ff,
0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
0xbefc0384, 0xe0304000,
0x785d0000, 0xe0304080,
0x785d0100, 0xe0304100,
0x785d0200, 0xe0304180,
0x785d0300, 0xbf8c3f70,
0x7e008500, 0x7e028501,
0x7e048502, 0x7e068503,
0x807c847c, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85ffee, 0xe0304000,
0x6e5d0000, 0xe0304080,
0x6e5d0100, 0xe0304100,
0x6e5d0200, 0xe0304180,
0x6e5d0300, 0xbf820032,
0xbef603ff, 0x01000000,
0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
0xe0304200, 0x785d0200,
0xe0304300, 0x785d0300,
0xbf8c3f70, 0x7e008500,
0x7e028501, 0x7e048502,
0x7e068503, 0x807c847c,
0x8078ff78, 0x00000400,
0xbf0a6f7c, 0xbf85ffee,
0xb96f1e06, 0x876fc16f,
0xbf84000e, 0x8f6f836f,
0x806f7c6f, 0xbefe03c1,
0xbeff0380, 0xe0304000,
0x785d0000, 0xbf8c3f70,
0x7e008500, 0x807c817c,
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
0xbeff03c1, 0xe0304000,
0x6e5d0000, 0xe0304100,
0x6e5d0100, 0xe0304200,
0x6e5d0200, 0xe0304300,
0x6e5d0300, 0xbf8c3f70,
0xb9782a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
0xbefc03ff, 0x0000006c,
0x80f89078, 0xf429003a,
0xf0000000, 0xbf8cc07f,
0x80fc847c, 0xbf800000,
0xbe803100, 0xbe823102,
0x80f8a078, 0xf42d003a,
0xf0000000, 0xbf8cc07f,
0x80fc887c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
0x80f8c078, 0xf431003a,
0xf0000000, 0xbf8cc07f,
0x80fc907c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
0xbe883108, 0xbe8a310a,
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
0xb9eef815, 0xbefc036f,
0xbefe0370, 0xbeff0371,
0x876f7bff, 0x000003ff,
0xb9ef4803, 0x876f7bff,
0xfffff800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
0xb96e2a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0x806eff6e,
0x00000200, 0x806e746e,
0x826f8075, 0x876fff6f,
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
0xfa000074, 0xbf8cc07f,
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbf8a0000,
0xbe80226c, 0xbf810000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};

View file

@ -20,6 +20,21 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
/* To compile this assembly code:
*
* Navi1x:
* cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
* sp3-nv1x nv1x.sp3 -hex nv1x.hex
*
* Others:
* cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
* sp3-gfx10 gfx10.sp3 -hex gfx10.hex
*/
#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
@ -59,6 +74,8 @@ var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
@ -96,17 +113,19 @@ var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
var s_save_status = ttmp12
var s_save_trapsts = ttmp5
var s_save_xnack_mask = ttmp6
var s_save_trapsts = ttmp15
var s_save_xnack_mask = s_save_trapsts
var s_wave_size = ttmp7
var s_save_buf_rsrc0 = ttmp8
var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
var s_save_mem_offset = ttmp14
var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts
var s_save_tmp = s_save_buf_rsrc2
var s_save_m0 = ttmp15
var s_save_tmp = ttmp14
var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp
var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
@ -128,23 +147,25 @@ var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
var s_restore_alloc_size = ttmp3
var s_restore_tmp = ttmp6
var s_restore_tmp = ttmp2
var s_restore_mem_offset_save = s_restore_tmp
var s_restore_m0 = s_restore_alloc_size
var s_restore_mode = ttmp7
var s_restore_flat_scratch = ttmp2
var s_restore_flat_scratch = s_restore_tmp
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
var s_restore_exec_lo = ttmp14
var s_restore_exec_hi = ttmp15
var s_restore_status = ttmp4
var s_restore_trapsts = ttmp5
var s_restore_exec_lo = ttmp4
var s_restore_exec_hi = ttmp5
var s_restore_status = ttmp14
var s_restore_trapsts = ttmp15
var s_restore_xnack_mask = ttmp13
var s_restore_buf_rsrc0 = ttmp8
var s_restore_buf_rsrc1 = ttmp9
var s_restore_buf_rsrc2 = ttmp10
var s_restore_buf_rsrc3 = ttmp11
var s_restore_size = ttmp7
var s_restore_size = ttmp6
var s_restore_ttmps_lo = s_restore_tmp
var s_restore_ttmps_hi = s_restore_alloc_size
shader main
asic(DEFAULT)
@ -159,6 +180,24 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
if SINGLE_STEP_MISSED_WORKAROUND
// No single step exceptions if MODE.DEBUG_EN=0.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
// Second-level trap already handled exception if STATUS.HALT=1.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_cbranch_scc0 L_FETCH_2ND_TRAP
L_NO_SINGLE_STEP_WORKAROUND:
end
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE
@ -170,6 +209,8 @@ L_SKIP_RESTORE:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
L_FETCH_2ND_TRAP:
#if ASIC_TARGET_NAVI1X
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
// unused space ttmp11[31:24].
@ -183,6 +224,7 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp11, ttmp11, ttmp3
s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@ -207,6 +249,7 @@ L_NO_NEXT_TRAP:
L_EXCP_CASE:
s_and_b32 ttmp1, ttmp1, 0xFFFF
#if ASIC_TARGET_NAVI1X
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
@ -214,6 +257,7 @@ L_EXCP_CASE:
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 ttmp2, ttmp2, ttmp3
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
#endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@ -223,23 +267,11 @@ L_EXCP_CASE:
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
//check whether there is mem_viol
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
s_cbranch_scc0 L_NO_PC_REWIND
//if so, need rewind PC assuming GDS operation gets NACKed
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0
L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
#if ASIC_TARGET_NAVI1X
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
@ -253,6 +285,7 @@ L_NO_PC_REWIND:
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
#endif
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@ -261,12 +294,31 @@ L_NO_PC_REWIND:
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
#if ASIC_TARGET_NAVI1X
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
// other waves are stuck into the sleep-loop and waiting for wrexec!=0
s_sleep 0x2
s_cbranch_execz L_SLEEP
#else
s_waitcnt lgkmcnt(0)
#endif
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_wave_size(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
#if ASIC_TARGET_NAVI1X
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
#endif
/* setup Resource Contants */
s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
@ -285,95 +337,16 @@ L_SLEEP:
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_wave_size, s_wave_size, S_WAVE_SIZE
s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi, it's at bit25
get_wave_size(s_wave_size)
/* save HW registers */
#if ASIC_TARGET_NAVI1X
// Save and clear vector XNACK state late to free up SGPRs.
s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
#endif
L_SAVE_HWREG:
// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
get_svgpr_size_bytes(s_save_tmp)
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
/* the first wave in the threadgroup */
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_mov_b32 s_save_exec_hi, 0x0
s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
/* save SGPRs */
// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
// SGPR SR memory offset : size(VGPR)+size(SVGPR)
get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
get_svgpr_size_bytes(s_save_tmp)
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0
s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
s_mov_b32 m0, 0x0 //SGPR initial index value =0
s_nop 0x0 //Manually inserted wait states
L_SAVE_SGPR_LOOP:
// SGPR is allocated in 16 SGPR granularity
s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
s_add_u32 m0, m0, 16 //next sgpr index
s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
//save the rest 12 SGPR
s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
/* save first 4 VGPR, then LDS save could use */
// each wave will alloc 4 vgprs at least...
s_mov_b32 s_save_mem_offset, 0
s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
/* save first 4 VGPRs, needed for SGPR save */
s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
s_and_b32 m0, m0, 1
s_cmp_eq_u32 m0, 1
@ -392,7 +365,7 @@ L_SAVE_4VGPR_WAVE32:
buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
s_branch L_SAVE_LDS
s_branch L_SAVE_HWREG
L_SAVE_4VGPR_WAVE64:
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
@ -404,6 +377,116 @@ L_SAVE_4VGPR_WAVE64:
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
/* save HW registers */
L_SAVE_HWREG:
// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
get_svgpr_size_bytes(s_save_tmp)
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
#if NO_SQC_STORE
v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
s_mov_b32 m0, 0x0 //Next lane of v2 to write to
#endif
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
// Not used on Sienna_Cichlid but keep layout same for debugger.
write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
// Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
s_mov_b32 exec_hi, 0x0
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#endif
/* save SGPRs */
// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
// SGPR SR memory offset : size(VGPR)+size(SVGPR)
get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
get_svgpr_size_bytes(s_save_tmp)
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
#if NO_SQC_STORE
s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
#else
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0
s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
#endif
s_mov_b32 m0, 0x0 //SGPR initial index value =0
s_nop 0x0 //Manually inserted wait states
L_SAVE_SGPR_LOOP:
// SGPR is allocated in 16 SGPR granularity
s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
s_mov_b32 ttmp13, 0x0
v_mov_b32 v2, 0x0
L_SAVE_SGPR_SKIP_TCP_STORE:
#endif
s_add_u32 m0, m0, 16 //next sgpr index
s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
//save the rest 12 SGPR
s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
#endif
/* save LDS */
L_SAVE_LDS:
@ -423,7 +506,7 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
@ -598,9 +681,7 @@ L_RESTORE:
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
//determine it is wave32 or wave64
s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_restore_size, s_restore_size, S_WAVE_SIZE
s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size
get_wave_size(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@ -634,7 +715,7 @@ L_RESTORE_LDS_NORMAL:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
s_and_b32 m0, m0, 1
s_cmp_eq_u32 m0, 1
s_mov_b32 m0, 0x0
@ -842,38 +923,55 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
s_mov_b32 s_restore_tmp, s_restore_pc_hi
s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
#if ASIC_TARGET_NAVI1X
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
#if ASIC_TARGET_NAVI1X
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
s_mov_b32 s_restore_mode, 0x0
s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_mov_b32 s_restore_tmp, 0x0
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_REPLAY_W64H_MASK
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
#endif
s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@ -887,32 +985,53 @@ L_END_PGM:
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
#if NO_SQC_STORE
// Copy into VGPR for later TCP store.
v_writelane_b32 v2, s, m0
s_add_u32 m0, m0, 0x1
#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
s_buffer_store_dword s, s_rsrc, m0 glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
#endif
end
function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
#if NO_SQC_STORE
// Copy into VGPR for later TCP store.
for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
v_writelane_b32 v2, s[sgpr_idx], ttmp13
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
end
function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
#if NO_SQC_STORE
// Copy into VGPR for later TCP store.
for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
v_writelane_b32 v2, s[sgpr_idx], ttmp13
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
end
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
@ -942,9 +1061,7 @@ end
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
s_lshr_b32 m0, s_size, S_WAVE_SIZE
s_and_b32 m0, m0, 1
s_cmp_eq_u32 m0, 1
s_bitcmp1_b32 s_size, S_WAVE_SIZE
s_cbranch_scc1 L_ENABLE_SHIFT_W64
s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
s_branch L_SHIFT_DONE
@ -965,3 +1082,9 @@ end
function get_hwreg_size_bytes
return 128
end
function get_wave_size(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
end

View file

@ -678,6 +678,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;

View file

@ -46,6 +46,7 @@ extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@ -72,6 +73,7 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
[CHIP_NAVI10] = &gfx_v10_kfd2kgd,
[CHIP_NAVI12] = &gfx_v10_kfd2kgd,
[CHIP_NAVI14] = &gfx_v10_kfd2kgd,
[CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
};
#ifdef KFD_SUPPORT_IOMMU_V2
@ -458,6 +460,24 @@ static const struct kfd_device_info navi14_device_info = {
.num_sdma_queues_per_engine = 8,
};
static const struct kfd_device_info sienna_cichlid_device_info = {
.asic_family = CHIP_SIENNA_CICHLID,
.asic_name = "sienna_cichlid",
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
.ih_ring_entry_size = 8 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = false,
.num_sdma_engines = 4,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
/* For each entry, [0] is regular and [1] is virtualisation device. */
static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@ -480,6 +500,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {&navi10_device_info, NULL},
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
[CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@ -559,6 +580,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
} else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
} else {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
@ -910,6 +935,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
if (!p)
return -ESRCH;
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
@ -977,6 +1003,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
p->lead_thread->pid, delay_jiffies);
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);

View file

@ -137,7 +137,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
void increment_queue_count(struct device_queue_manager *dqm,
static void increment_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count++;
@ -145,7 +145,7 @@ void increment_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count++;
}
void decrement_queue_count(struct device_queue_manager *dqm,
static void decrement_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count--;
@ -153,6 +153,30 @@ void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
{
int ret;
uint64_t tmp = 0;
if (!val)
return -EINVAL;
/*
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
*/
if (!access_ok((const void __user *)(q_rptr +
sizeof(uint64_t)), sizeof(uint64_t))) {
pr_err("Can't access sdma queue activity counter\n");
return -EFAULT;
}
ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
if (!ret) {
*val = tmp;
}
return ret;
}
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@ -487,6 +511,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
@ -521,9 +546,23 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q)
{
int retval;
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
dqm_lock(dqm);
retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
if (!retval)
pdd->sdma_past_activity_counter += sdma_val;
dqm_unlock(dqm);
return retval;
@ -1428,6 +1467,18 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd_mgr;
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
&sdma_val);
if (retval)
pr_err("Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
retval = 0;
@ -1449,10 +1500,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
deallocate_doorbell(qpd, q);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
deallocate_sdma_queue(dqm, q);
pdd->sdma_past_activity_counter += sdma_val;
}
list_del(&q->list);
qpd->queue_count--;
@ -1886,6 +1938,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:

Some files were not shown because too many files have changed in this diff Show more