Merge tag 'amd-drm-next-5.9-2020-07-17' of git://people.freedesktop.org/~agd5f/linux into drm-next

amd-drm-next-5.9-2020-07-17:

amdgpu:
- SI UVD/VCE clock support
- Updates for Sienna Cichlid
- Expose drm rotation property
- Atomfirmware updates for renoir
- updates to GPUVM hub handling for different register layouts
- swSMU restructuring and cleanups
- RAS fixes
- DC fixes
- mode1 reset support for Sienna Cichlid
- Add support for Navy Flounder GPUs

amdkfd:
- Add SMI events watch interface

UAPI:
- Add amdkfd SMI events watch interface
  Userspace which uses this interface:
  2235ede34c

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200717132022.4014-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2020-07-23 15:38:10 +10:00
commit 2067391195
158 changed files with 5277 additions and 3134 deletions

View file

@ -193,6 +193,7 @@ static const bool debug_evictions; /* = false */
#endif
extern int amdgpu_tmz;
extern int amdgpu_reset_method;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@ -1010,10 +1011,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
uint32_t *buf, size_t size, bool write);
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@ -1032,8 +1033,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
*/
#define AMDGPU_REGS_NO_KIQ (1<<1)
#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
@ -1041,9 +1042,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0))
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@ -1080,7 +1081,16 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
tmp_ |= ((val) & ~(mask)); \
WREG32_PLL(reg, tmp_); \
} while (0)
#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
#define WREG32_SMC_P(_Reg, _Val, _Mask) \
do { \
u32 tmp = RREG32_SMC(_Reg); \
tmp &= (_Mask); \
tmp |= ((_Val) & ~(_Mask)); \
WREG32_SMC(_Reg, tmp); \
} while (0)
#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false))
#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))

View file

@ -31,8 +31,6 @@
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
static const unsigned int compute_vmid_bitmap = 0xFF00;
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
*/
@ -113,7 +111,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
if (adev->kfd.dev) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = compute_vmid_bitmap,
.compute_vmid_bitmap =
((1 << AMDGPU_NUM_VMID) - 1) -
((1 << adev->vm_manager.first_kfd_vmid) - 1),
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
@ -637,10 +637,8 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd.dev) {
if ((1 << vmid) & compute_vmid_bitmap)
return true;
}
if (adev->kfd.dev)
return vmid >= adev->vm_manager.first_kfd_vmid;
return false;
}

View file

@ -111,6 +111,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
union igp_info {
struct atom_integrated_system_info_v1_11 v11;
struct atom_integrated_system_info_v1_12 v12;
};
union umc_info {
@ -214,6 +215,15 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
case 12:
mem_channel_number = igp_info->v12.umachannelnumber;
/* channel width is 64 */
if (vram_width)
*vram_width = mem_channel_number * 64;
mem_type = igp_info->v12.memorytype;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
default:
return -EINVAL;
}

View file

@ -1343,27 +1343,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
{
struct amdgpu_job *job;
struct drm_sched_job *s_job;
struct drm_sched_job *s_job, *tmp;
uint32_t preempt_seq;
struct dma_fence *fence, **ptr;
struct amdgpu_fence_driver *drv = &ring->fence_drv;
struct drm_gpu_scheduler *sched = &ring->sched;
bool preempted = true;
if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
return;
preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
if (preempt_seq <= atomic_read(&drv->last_seq))
return;
if (preempt_seq <= atomic_read(&drv->last_seq)) {
preempted = false;
goto no_preempt;
}
preempt_seq &= drv->num_fences_mask;
ptr = &drv->fences[preempt_seq];
fence = rcu_dereference_protected(*ptr, 1);
no_preempt:
spin_lock(&sched->job_list_lock);
list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
if (dma_fence_is_signaled(&s_job->s_fence->finished)) {
/* remove job from ring_mirror_list */
list_del_init(&s_job->node);
sched->ops->free_job(s_job);
continue;
}
job = to_amdgpu_job(s_job);
if (job->fence == fence)
if (preempted && job->fence == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@ -1461,10 +1471,12 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
}
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true);
ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq);
if (ret || val > max_freq || val < min_freq)
return -EINVAL;
ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true);
ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val);
} else {
return 0;
}
pm_runtime_mark_last_busy(adev->ddev->dev);

View file

@ -81,6 +81,7 @@ MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@ -114,6 +115,7 @@ const char *amdgpu_asic_name[] = {
"NAVI14",
"NAVI12",
"SIENNA_CICHLID",
"NAVY_FLOUNDER",
"LAST",
};
@ -301,10 +303,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
}
/*
* device register access helper functions.
* MMIO register access helper functions.
*/
/**
* amdgpu_device_rreg - read a register
* amdgpu_mm_rreg - read a memory mapped IO register
*
* @adev: amdgpu_device pointer
* @reg: dword aligned register offset
@ -312,8 +314,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
*
* Returns the 32 bit value from the offset specified.
*/
uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags)
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags)
{
uint32_t ret;
@ -322,9 +324,15 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
if ((reg * 4) < adev->rmmio_size)
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
else
ret = adev->pcie_rreg(adev, (reg * 4));
trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
else {
unsigned long flags;
spin_lock_irqsave(&adev->mmio_idx_lock, flags);
writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}
trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
return ret;
}
@ -370,19 +378,24 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
BUG();
}
void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
uint32_t v, uint32_t acc_flags)
void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
{
trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
if ((reg * 4) < adev->rmmio_size)
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
else
adev->pcie_wreg(adev, (reg * 4), v);
else {
unsigned long flags;
spin_lock_irqsave(&adev->mmio_idx_lock, flags);
writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}
}
/**
* amdgpu_device_wreg - write to a register
* amdgpu_mm_wreg - write to a memory mapped IO register
*
* @adev: amdgpu_device pointer
* @reg: dword aligned register offset
@ -391,13 +404,13 @@ void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_
*
* Writes the value specified to the offset specified.
*/
void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
{
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
return amdgpu_kiq_wreg(adev, reg, v);
amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
}
/*
@ -416,7 +429,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t
return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
}
amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
}
/**
@ -1621,6 +1634,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
case CHIP_NAVY_FLOUNDER:
chip_name = "navy_flounder";
break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@ -1722,6 +1738,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
amdgpu_device_enable_virtual_display(adev);
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
}
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
@ -1788,6 +1810,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->family = AMDGPU_FAMILY_NV;
r = nv_set_ip_blocks(adev);
@ -1801,31 +1824,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
amdgpu_amdkfd_device_probe(adev);
if (amdgpu_sriov_vf(adev)) {
/* handle vbios stuff prior full access mode for new handshake */
if (adev->virt.req_init_data_ver == 1) {
if (!amdgpu_get_bios(adev)) {
DRM_ERROR("failed to get vbios\n");
return -EINVAL;
}
r = amdgpu_atombios_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
return r;
}
}
}
/* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
* will not be prepared by host for this VF */
if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@ -1857,10 +1855,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
/* skip vbios handling for new handshake */
if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
continue;
/* Read BIOS */
if (!amdgpu_get_bios(adev))
return -EINVAL;
@ -1987,12 +1981,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
return r;
if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@ -2474,18 +2462,21 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* displays are handled separately */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
if (r) {
DRM_ERROR("suspend of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = false;
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
continue;
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
if (r) {
DRM_ERROR("suspend of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = false;
}
return 0;
@ -2817,6 +2808,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
#endif
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
#endif
return amdgpu_dc != 0;
#endif
@ -3324,6 +3316,9 @@ fence_driver_init:
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
if (amdgpu_sriov_vf(adev))
flush_delayed_work(&adev->delayed_init_work);
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (r) {
dev_err(adev->dev, "Could not create amdgpu device attr\n");
@ -3951,6 +3946,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
break;
default:
goto disabled;
@ -4256,18 +4252,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
bool in_ras_intr = amdgpu_ras_intr_triggered();
bool use_baco =
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
true : false;
bool need_emergency_restart = false;
bool audio_suspended = false;
/**
* Special case: RAS triggered and full reset isn't supported
*/
need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
/*
* Flush RAM to disk so that after reboot
* the user can read log and see why the system rebooted.
*/
if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
DRM_WARN("Emergency reboot.");
ksys_sync_helper();
@ -4275,7 +4272,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
dev_info(adev->dev, "GPU %s begin!\n",
(in_ras_intr && !use_baco) ? "jobs stop":"reset");
need_emergency_restart ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from
@ -4347,7 +4344,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_fbdev_set_suspend(tmp_adev, 1);
/* disable ras on ALL IPs */
if (!(in_ras_intr && !use_baco) &&
if (!need_emergency_restart &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
@ -4359,12 +4356,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
if (in_ras_intr && !use_baco)
if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
if (in_ras_intr && !use_baco)
if (need_emergency_restart)
goto skip_sched_resume;
/*
@ -4441,7 +4438,7 @@ skip_hw_reset:
skip_sched_resume:
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/*unlock kfd: SRIOV would do it separately */
if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);

View file

@ -133,7 +133,7 @@ static int hw_id_map[MAX_HWIP] = {
static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
{
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
uint64_t pos = vram_size - adev->discovery_tmr_size;
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
adev->discovery_tmr_size, false);

View file

@ -24,7 +24,8 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
#define DISCOVERY_TMR_SIZE (64 << 10)
#define DISCOVERY_TMR_SIZE (4 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);

View file

@ -911,8 +911,7 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
low ? &clk_freq : NULL,
!low ? &clk_freq : NULL,
true);
!low ? &clk_freq : NULL);
if (ret)
return 0;
return clk_freq * 100;
@ -929,8 +928,7 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
low ? &clk_freq : NULL,
!low ? &clk_freq : NULL,
true);
!low ? &clk_freq : NULL);
if (ret)
return 0;
return clk_freq * 100;
@ -1141,6 +1139,26 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
return 0;
}
bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
if (is_support_sw_smu(adev))
return smu_mode1_reset_is_support(smu);
return false;
}
int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
if (is_support_sw_smu(adev))
return smu_mode1_reset(smu);
return -EOPNOTSUPP;
}
int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
enum PP_SMC_POWER_PROFILE type,
bool en)

View file

@ -529,6 +529,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev);
int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
enum pp_mp1_state mp1_state);

View file

@ -149,6 +149,7 @@ int amdgpu_mes = 0;
int amdgpu_noretry;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = 0;
int amdgpu_reset_method = -1; /* auto */
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@ -757,6 +758,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
module_param_named(tmz, amdgpu_tmz, int, 0444);
/**
* DOC: reset_method (int)
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0444);
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},

View file

@ -424,9 +424,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
"0x%016llx, cpu addr 0x%p\n", ring->name,
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
ring->name, ring->fence_drv.gpu_addr);
return 0;
}
@ -782,8 +781,10 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
if (amdgpu_sriov_vf(adev))
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov,
ARRAY_SIZE(amdgpu_debugfs_fence_list_sriov));
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list,
ARRAY_SIZE(amdgpu_debugfs_fence_list));
#else
return 0;
#endif

View file

@ -930,7 +930,8 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = {
int amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list,
ARRAY_SIZE(amdgpu_debugfs_gem_list));
#endif
return 0;
}

View file

@ -83,6 +83,15 @@ struct amdgpu_vmhub {
uint32_t vm_context0_cntl;
uint32_t vm_l2_pro_fault_status;
uint32_t vm_l2_pro_fault_cntl;
/*
* store the register distances between two continuous context domain
* and invalidation engine.
*/
uint32_t ctx_distance;
uint32_t ctx_addr_distance; /* include LO32/HI32 */
uint32_t eng_distance;
uint32_t eng_addr_distance; /* include LO32/HI32 */
};
/*

View file

@ -468,7 +468,8 @@ static const struct drm_info_list amdgpu_debugfs_sa_list[] = {
int amdgpu_debugfs_sa_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1);
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list,
ARRAY_SIZE(amdgpu_debugfs_sa_list));
#else
return 0;
#endif

View file

@ -574,6 +574,9 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
INIT_LIST_HEAD(&id_mgr->ids_lru);
atomic_set(&id_mgr->reserved_vmid_num, 0);
/* manage only VMIDs not used by KFD */
id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
amdgpu_vmid_reset(adev, i, j);

View file

@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
memset(&ti, 0, sizeof(struct amdgpu_task_info));
if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
DRM_ERROR("ring %s timeout, but soft recovered\n",
s_job->sched->name);
return;

View file

@ -1105,7 +1105,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
@ -1173,7 +1173,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@ -1241,7 +1241,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
else
@ -1311,7 +1311,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
else
@ -1381,7 +1381,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
else
@ -1451,7 +1451,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
}
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
else
@ -2960,7 +2960,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
if (r)
return r;
return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000);
return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000);
}
static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev,
@ -2997,7 +2997,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
if (r)
return r;
return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000);
return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000);
}
static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
@ -3558,21 +3558,34 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
if (ret)
DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
if (adev->family == AMDGPU_FAMILY_SI) {
mutex_lock(&adev->pm.mutex);
if (enable) {
adev->pm.dpm.uvd_active = true;
adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
} else {
adev->pm.dpm.uvd_active = false;
}
mutex_unlock(&adev->pm.mutex);
/* enable/disable Low Memory PState for UVD (4k videos) */
if (adev->asic_type == CHIP_STONEY &&
adev->uvd.decode_image_width >= WIDTH_4K) {
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
amdgpu_pm_compute_clocks(adev);
} else {
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
if (ret)
DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
if (hwmgr && hwmgr->hwmgr_func &&
hwmgr->hwmgr_func->update_nbdpm_pstate)
hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
!enable,
true);
/* enable/disable Low Memory PState for UVD (4k videos) */
if (adev->asic_type == CHIP_STONEY &&
adev->uvd.decode_image_width >= WIDTH_4K) {
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
if (hwmgr && hwmgr->hwmgr_func &&
hwmgr->hwmgr_func->update_nbdpm_pstate)
hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
!enable,
true);
}
}
}
@ -3580,10 +3593,24 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
if (ret)
DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
if (adev->family == AMDGPU_FAMILY_SI) {
mutex_lock(&adev->pm.mutex);
if (enable) {
adev->pm.dpm.vce_active = true;
/* XXX select vce level based on ring/task */
adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
} else {
adev->pm.dpm.vce_active = false;
}
mutex_unlock(&adev->pm.mutex);
amdgpu_pm_compute_clocks(adev);
} else {
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
if (ret)
DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
}
}
void amdgpu_pm_print_power_states(struct amdgpu_device *adev)

View file

@ -99,6 +99,7 @@ static int psp_early_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
@ -430,6 +431,52 @@ static int psp_tmr_load(struct psp_context *psp)
return ret;
}
static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
struct psp_gfx_cmd_resp *cmd)
{
if (amdgpu_sriov_vf(psp->adev))
cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
else
cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR;
}
static int psp_tmr_unload(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_tmr_unload_cmd_buf(psp, cmd);
DRM_INFO("free PSP TMR buffer\n");
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static int psp_tmr_terminate(struct psp_context *psp)
{
int ret;
void *tmr_buf;
void **pptr;
ret = psp_tmr_unload(psp);
if (ret)
return ret;
/* free TMR memory buffer */
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
return 0;
}
static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t asd_mc, uint32_t size)
{
@ -452,7 +499,9 @@ static int psp_asd_load(struct psp_context *psp)
* add workaround to bypass it for sriov now.
* TODO: add version check to make it common
*/
if (amdgpu_sriov_vf(psp->adev) || (psp->adev->asic_type == CHIP_SIENNA_CICHLID))
if (amdgpu_sriov_vf(psp->adev) ||
(psp->adev->asic_type == CHIP_SIENNA_CICHLID) ||
(psp->adev->asic_type == CHIP_NAVY_FLOUNDER))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
@ -1717,7 +1766,8 @@ static int psp_np_fw_load(struct psp_context *psp)
continue;
if (psp->autoload_supported &&
adev->asic_type == CHIP_SIENNA_CICHLID &&
(adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
@ -1866,8 +1916,6 @@ static int psp_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
void *tmr_buf;
void **pptr;
int ret;
if (psp->adev->psp.ta_fw) {
@ -1883,10 +1931,9 @@ static int psp_hw_fini(void *handle)
return ret;
}
psp_tmr_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
amdgpu_bo_free_kernel(&psp->fw_pri_bo,
&psp->fw_pri_mc_addr, &psp->fw_pri_buf);
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
@ -1933,6 +1980,18 @@ static int psp_suspend(void *handle)
}
}
ret = psp_asd_unload(psp);
if (ret) {
DRM_ERROR("Failed to unload asd\n");
return ret;
}
ret = psp_tmr_terminate(psp);
if (ret) {
DRM_ERROR("Failed to terminate tmr\n");
return ret;
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret) {
DRM_ERROR("PSP ring stop failed\n");
@ -2222,6 +2281,107 @@ out:
return err;
}
int parse_ta_bin_descriptor(struct psp_context *psp,
const struct ta_fw_bin_desc *desc,
const struct ta_firmware_header_v2_0 *ta_hdr)
{
uint8_t *ucode_start_addr = NULL;
if (!psp || !desc || !ta_hdr)
return -EINVAL;
ucode_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(desc->offset_bytes) +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
switch (desc->fw_type) {
case TA_FW_TYPE_PSP_ASD:
psp->asd_fw_version = le32_to_cpu(desc->fw_version);
psp->asd_feature_version = le32_to_cpu(desc->fw_version);
psp->asd_ucode_size = le32_to_cpu(desc->size_bytes);
psp->asd_start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_XGMI:
psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version);
psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes);
psp->ta_xgmi_start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_RAS:
psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version);
psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes);
psp->ta_ras_start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_HDCP:
psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version);
psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes);
psp->ta_hdcp_start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_DTM:
psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version);
psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes);
psp->ta_dtm_start_addr = ucode_start_addr;
break;
default:
dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type);
break;
}
return 0;
}
int psp_init_ta_microcode(struct psp_context *psp,
const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
char fw_name[30];
const struct ta_firmware_header_v2_0 *ta_hdr;
int err = 0;
int ta_index = 0;
if (!chip_name) {
dev_err(adev->dev, "invalid chip name for ta microcode\n");
return -EINVAL;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->psp.ta_fw);
if (err)
goto out;
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
dev_err(adev->dev, "unsupported TA header version\n");
err = -EINVAL;
goto out;
}
if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) {
dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
err = -EINVAL;
goto out;
}
for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) {
err = parse_ta_bin_descriptor(psp,
&ta_hdr->ta_fw_bin[ta_index],
ta_hdr);
if (err)
goto out;
}
return 0;
out:
dev_err(adev->dev, "fail to initialize ta microcode\n");
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
return err;
}
static int psp_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{

View file

@ -371,4 +371,6 @@ int psp_init_asd_microcode(struct psp_context *psp,
const char *chip_name);
int psp_init_sos_microcode(struct psp_context *psp,
const char *chip_name);
int psp_init_ta_microcode(struct psp_context *psp,
const char *chip_name);
#endif

View file

@ -2131,3 +2131,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
amdgpu_ras_reset_gpu(adev);
}
}
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
{
if (adev->asic_type == CHIP_VEGA20 &&
adev->pm.fw_version <= 0x283400) {
return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
amdgpu_ras_intr_triggered();
}
return false;
}

View file

@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
#endif

View file

@ -35,7 +35,7 @@
#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
TRACE_EVENT(amdgpu_device_rreg,
TRACE_EVENT(amdgpu_mm_rreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
TP_STRUCT__entry(
@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_device_rreg,
(unsigned long)__entry->value)
);
TRACE_EVENT(amdgpu_device_wreg,
TRACE_EVENT(amdgpu_mm_wreg,
TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
TP_ARGS(did, reg, value),
TP_STRUCT__entry(

View file

@ -1869,7 +1869,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
adev->discovery_tmr_size =
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
if (!adev->discovery_tmr_size)
adev->discovery_tmr_size = DISCOVERY_TMR_SIZE;
adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
/* reserve vram for mem train according to TMR location */

View file

@ -390,11 +390,11 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
default:
DRM_ERROR("Unknown firmware load type\n");
}

View file

@ -124,6 +124,29 @@ struct ta_firmware_header_v1_0 {
uint32_t ta_dtm_size_bytes;
};
enum ta_fw_type {
TA_FW_TYPE_UNKOWN,
TA_FW_TYPE_PSP_ASD,
TA_FW_TYPE_PSP_XGMI,
TA_FW_TYPE_PSP_RAS,
TA_FW_TYPE_PSP_HDCP,
TA_FW_TYPE_PSP_DTM,
};
struct ta_fw_bin_desc {
uint32_t fw_type;
uint32_t fw_version;
uint32_t offset_bytes;
uint32_t size_bytes;
};
/* version_major=2, version_minor=0 */
struct ta_firmware_header_v2_0 {
struct common_firmware_header header;
uint32_t ta_fw_bin_count;
struct ta_fw_bin_desc ta_fw_bin[];
};
/* version_major=1, version_minor=0 */
struct gfx_firmware_header_v1_0 {
struct common_firmware_header header;
@ -276,6 +299,7 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
@ -288,6 +312,8 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
};
#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc))
/*
* fw loading support
*/

View file

@ -43,6 +43,7 @@
#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@ -53,6 +54,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI10);
MODULE_FIRMWARE(FIRMWARE_NAVI14);
MODULE_FIRMWARE(FIRMWARE_NAVI12);
MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@ -115,6 +117,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
case CHIP_NAVY_FLOUNDER:
fw_name = FIRMWARE_NAVY_FLOUNDER;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
default:
return -EINVAL;
}
@ -421,6 +429,10 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
/* VCN in SRIOV does not support direct register read/write */
if (amdgpu_sriov_vf(adev))
return 0;
WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)

View file

@ -27,6 +27,9 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "vi.h"
#include "soc15.h"
#include "nv.h"
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
@ -513,6 +516,31 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
switch (adev->asic_type) {
case CHIP_TONGA:
case CHIP_FIJI:
vi_set_virt_ops(adev);
break;
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
soc15_set_virt_ops(adev);
break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
nv_set_virt_ops(adev);
/* try send GPU_INIT_DATA request to host */
amdgpu_virt_request_init_data(adev);
break;
default: /* other chip doesn't support SRIOV */
DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
}
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)

View file

@ -324,6 +324,7 @@ struct amdgpu_vm {
struct amdgpu_vm_manager {
/* Handling of VMIDs */
struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
unsigned int first_kfd_vmid;
/* Handling of VM fences */
u64 fence_context;

View file

@ -73,6 +73,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
athub_v2_1_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
athub_v2_1_update_medium_grain_light_sleep(adev,

View file

@ -1326,6 +1326,14 @@ cik_asic_reset_method(struct amdgpu_device *adev)
{
bool baco_reset;
if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:

View file

@ -54,8 +54,6 @@
#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
#define AMDGPU_NUM_OF_VMIDS 8
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
#define VMID(x) ((x) << 4)

View file

@ -145,6 +145,13 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
@ -3114,6 +3121,48 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
/* Pending on emulation bring up */
};
static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xffffffff, 0x010b0000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
};
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@ -3302,6 +3351,12 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_10_3_sienna_cichlid,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
break;
case CHIP_NAVY_FLOUNDER:
soc15_program_register_sequence(adev,
golden_settings_gc_10_3_2,
(const u32)ARRAY_SIZE(golden_settings_gc_10_3_2));
break;
default:
break;
}
@ -3483,6 +3538,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.cp_fw_write_wait = true;
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->gfx.cp_fw_write_wait = true;
break;
default:
@ -3578,6 +3634,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
case CHIP_NAVY_FLOUNDER:
chip_name = "navy_flounder";
break;
default:
BUG();
}
@ -4108,6 +4167,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@ -4230,6 +4290,7 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
@ -4484,7 +4545,8 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
/* for ASICs that integrates GFX v10.3
* pa_sc_tile_steering_override should be set to 0 */
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
return 0;
/* init num_sc */
@ -4512,8 +4574,6 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
}
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
{
@ -4529,7 +4589,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
nv_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
@ -4540,7 +4600,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
@ -4712,12 +4772,19 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
if (adev->asic_type == CHIP_NAVI12) {
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
} else {
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
}
return 0;
}
@ -5325,7 +5392,12 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
if (adev->asic_type == CHIP_NAVI12) {
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
} else {
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
}
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@ -5710,6 +5782,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
@ -5842,6 +5915,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
if (enable) {
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
break;
default:
@ -5851,6 +5925,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
@ -5944,6 +6019,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
/* tell RLC which is KIQ queue */
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
@ -6647,6 +6723,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
* has been remapped to mmVGT_ESGS_RING_SIZE */
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
@ -6685,6 +6762,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
@ -6975,6 +7053,7 @@ static int gfx_v10_0_soft_reset(void *handle)
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET,
@ -7073,6 +7152,7 @@ static int gfx_v10_0_early_init(void *handle)
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
break;
default:
@ -7125,6 +7205,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
@ -7156,6 +7237,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
data = RLC_SAFE_MODE__CMD_MASK;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
@ -7468,6 +7550,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@ -7483,12 +7566,12 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
int data;
/* AMD_CG_SUPPORT_GFX_MGCG */
data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
*flags |= AMD_CG_SUPPORT_GFX_MGCG;
/* AMD_CG_SUPPORT_GFX_CGCG */
data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CGCG;
@ -7497,17 +7580,17 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_GFX_CGLS;
/* AMD_CG_SUPPORT_GFX_RLC_LS */
data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
/* AMD_CG_SUPPORT_GFX_CP_LS */
data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
/* AMD_CG_SUPPORT_GFX_3D_CGCG */
data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
@ -7685,14 +7768,9 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
u64 seq, unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
/* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
if (adev->pdev->device == 0x50)
int_sel = false;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
@ -7843,12 +7921,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size))
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
return -ENOMEM;
}
/* assert preemption condition */
amdgpu_ring_set_preempt_cond_exec(ring, false);
@ -7859,6 +7942,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
++ring->trail_seq);
amdgpu_ring_commit(kiq_ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
/* poll the trailing fence */
for (i = 0; i < adev->usec_timeout; i++) {
if (ring->trail_seq ==
@ -8567,6 +8652,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
case CHIP_NAVI12:

View file

@ -1850,8 +1850,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
*
*/
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@ -1869,7 +1867,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@ -1882,7 +1880,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
WREG32(amdgpu_gds_reg_offset[i].gws, 0);

View file

@ -3686,8 +3686,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
*
*/
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@ -3710,7 +3708,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
vi_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@ -3723,7 +3721,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
WREG32(amdgpu_gds_reg_offset[i].gws, 0);

View file

@ -2463,8 +2463,6 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
}
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@ -2484,7 +2482,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
soc15_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
@ -2495,7 +2493,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
/* Initialize all compute VMIDs to have no GDS, GWS, or OA
acccess. These should be enabled by FW for target VMIDs. */
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);

View file

@ -38,15 +38,15 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
@ -207,6 +207,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@ -245,25 +246,31 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
i * hub->eng_addr_distance, 0x1f);
}
}
@ -299,12 +306,14 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
@ -360,7 +369,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
}
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
@ -386,4 +395,11 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}

View file

@ -49,15 +49,15 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
@ -218,6 +218,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
int i;
uint32_t tmp;
@ -247,25 +248,31 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
i * hub->eng_addr_distance, 0x1f);
}
}
@ -287,12 +294,14 @@ int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
@ -373,4 +382,12 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ -
mmGCVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}

View file

@ -49,15 +49,15 @@ u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev)
@ -207,6 +207,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
int i;
uint32_t tmp;
@ -236,25 +237,31 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
i * hub->eng_addr_distance, 0x1f);
}
}
@ -288,12 +295,14 @@ int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0);
WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
@ -372,6 +381,14 @@ void gfxhub_v2_1_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ -
mmGCVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)

View file

@ -49,8 +49,6 @@
#include "mmhub_v2_0.h"
#include "athub_v2_0.h"
#include "athub_v2_1.h"
/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
#if 0
static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
@ -88,7 +86,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
/* MM HUB */
hub = &adev->vmhub[AMDGPU_MMHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
tmp = RREG32(reg);
tmp &= ~bits[AMDGPU_MMHUB_0];
WREG32(reg, tmp);
@ -97,7 +95,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
/* GFX HUB */
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
tmp = RREG32(reg);
tmp &= ~bits[AMDGPU_GFXHUB_0];
WREG32(reg, tmp);
@ -107,7 +105,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
/* MM HUB */
hub = &adev->vmhub[AMDGPU_MMHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
tmp = RREG32(reg);
tmp |= bits[AMDGPU_MMHUB_0];
WREG32(reg, tmp);
@ -116,7 +114,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
/* GFX HUB */
hub = &adev->vmhub[AMDGPU_GFXHUB_0];
for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
reg = hub->vm_context0_cntl + hub->ctx_distance * i;
tmp = RREG32(reg);
tmp |= bits[AMDGPU_GFXHUB_0];
WREG32(reg, tmp);
@ -285,7 +283,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng);
if (tmp & 0x1)
break;
udelay(1);
@ -295,18 +294,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
/*
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
if (vmhub == AMDGPU_GFXHUB_0)
RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng);
tmp &= 1 << vmid;
if (tmp)
break;
@ -320,7 +320,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
spin_unlock(&adev->gmc.invalidate_lock);
@ -360,8 +361,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
u32 req = hub->vm_inv_eng0_req + eng;
u32 ack = hub->vm_inv_eng0_ack + eng;
u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
@ -504,16 +505,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0x1, 0x1);
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
(hub->ctx_addr_distance * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
(hub->ctx_addr_distance * vmid),
upper_32_bits(pd_addr));
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng,
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
hub->eng_distance * eng,
hub->vm_inv_eng0_ack +
hub->eng_distance * eng,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
@ -522,7 +528,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
return pd_addr;
}
@ -686,7 +693,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = 0;
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
base = gfxhub_v2_1_get_fb_location(adev);
else
base = gfxhub_v2_0_get_fb_location(adev);
@ -698,7 +706,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_gmc_gart_location(adev, mc);
/* base offset of vram pages */
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev);
else
adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
@ -746,6 +755,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@ -814,7 +824,8 @@ static int gmc_v10_0_sw_init(void *handle)
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
gfxhub_v2_1_init(adev);
else
gfxhub_v2_0_init(adev);
@ -840,6 +851,7 @@ static int gmc_v10_0_sw_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
@ -905,8 +917,7 @@ static int gmc_v10_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.first_kfd_vmid = 8;
amdgpu_vm_manager_init(adev);
@ -945,6 +956,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
break;
default:
break;
@ -971,7 +983,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
r = gfxhub_v2_1_gart_enable(adev);
else
r = gfxhub_v2_0_gart_enable(adev);
@ -995,7 +1008,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
gfxhub_v2_1_set_fault_enable_default(adev, value);
else
gfxhub_v2_0_set_fault_enable_default(adev, value);
@ -1036,7 +1050,8 @@ static int gmc_v10_0_hw_init(void *handle)
*/
static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
gfxhub_v2_1_gart_disable(adev);
else
gfxhub_v2_0_gart_disable(adev);
@ -1110,7 +1125,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
if (r)
return r;
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
@ -1122,7 +1138,8 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
mmhub_v2_0_get_clockgating(adev, flags);
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);

View file

@ -878,7 +878,7 @@ static int gmc_v6_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.first_kfd_vmid = 8;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */

View file

@ -1052,7 +1052,7 @@ static int gmc_v7_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.first_kfd_vmid = 8;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */

View file

@ -1177,7 +1177,7 @@ static int gmc_v8_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.first_kfd_vmid = 8;
amdgpu_vm_manager_init(adev);
/* base offset of vram pages */

View file

@ -68,9 +68,6 @@
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
static const u32 golden_settings_vega10_hdp[] =
{
0xf64, 0x0fffffff, 0x00000000,
@ -505,11 +502,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (adev->gfx.kiq.ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
!adev->in_gpu_reset) {
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
1 << vmid);
return;
}
@ -526,7 +523,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng);
if (tmp & 0x1)
break;
udelay(1);
@ -537,7 +535,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
do {
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
WREG32_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng, inv_req);
/*
* Issue a dummy read to wait for the ACK register to
@ -545,10 +544,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* GRBM interface.
*/
if (vmhub == AMDGPU_GFXHUB_0)
RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
RREG32_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng);
for (j = 0; j < adev->usec_timeout; j++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng);
if (tmp & (1 << vmid))
break;
udelay(1);
@ -564,7 +565,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
spin_unlock(&adev->gmc.invalidate_lock);
@ -679,16 +681,21 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0x1, 0x1);
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
(hub->ctx_addr_distance * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
(hub->ctx_addr_distance * vmid),
upper_32_bits(pd_addr));
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng,
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
hub->eng_distance * eng,
hub->vm_inv_eng0_ack +
hub->eng_distance * eng,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
@ -697,7 +704,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
return pd_addr;
}
@ -1248,12 +1256,15 @@ static int gmc_v9_0_sw_init(void *handle)
/*
* number of VMs
* VMID 0 is reserved for System
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
* amdgpu graphics/compute will use VMIDs 1..n-1
* amdkfd will use VMIDs n..15
*
* The first KFD VMID is 8 for GPUs with graphics, 3 for
* compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
* for video processing.
*/
adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
adev->vm_manager.first_kfd_vmid =
adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
amdgpu_vm_manager_init(adev);

View file

@ -377,7 +377,7 @@ static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for register write */
data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);

View file

@ -629,7 +629,7 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for register write */
data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);

View file

@ -54,15 +54,15 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
@ -230,6 +230,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned num_level, block_size;
uint32_t tmp;
int i;
@ -268,25 +269,31 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
i * hub->eng_addr_distance, 0x1f);
}
}
@ -333,12 +340,14 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
@ -429,6 +438,12 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,

View file

@ -39,15 +39,15 @@
void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
/* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */
int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
@ -209,6 +209,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
int i;
uint32_t tmp;
@ -239,25 +240,31 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
2 * i, 0xffffffff);
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
2 * i, 0x1f);
i * hub->eng_addr_distance, 0x1f);
}
}
@ -279,12 +286,14 @@ int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
@ -365,6 +374,13 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ -
mmMMVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@ -374,6 +390,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
@ -406,6 +423,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
if (def != data)
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
if (def1 != data1)
@ -427,6 +445,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
break;
default:
@ -442,6 +461,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
if (def != data) {
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
break;
default:
@ -462,6 +482,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
mmhub_v2_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
@ -483,6 +504,7 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;

View file

@ -57,20 +57,16 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
uint32_t vmid, uint64_t value)
{
/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
* mmVML2VC0_VM_CONTEXT1_*
*/
int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
lower_32_bits(value));
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
upper_32_bits(value));
}
@ -301,6 +297,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
uint32_t tmp;
int i;
@ -335,21 +332,25 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i,
tmp);
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0);
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
}
@ -357,16 +358,19 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
int hubid)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->eng_addr_distance,
0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
i * hub->eng_addr_distance,
0x1f);
}
}
@ -395,6 +399,7 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
u32 tmp;
u32 i, j;
@ -404,7 +409,7 @@ void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_CNTL,
j * MMHUB_INSTANCE_REGISTER_OFFSET +
i, 0);
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
@ -534,6 +539,15 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) +
i * MMHUB_INSTANCE_REGISTER_OFFSET;
hub[i]->ctx_distance = mmVML2VC0_VM_CONTEXT1_CNTL -
mmVML2VC0_VM_CONTEXT0_CNTL;
hub[i]->ctx_addr_distance = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub[i]->eng_distance = mmVML2VC0_VM_INVALIDATE_ENG1_REQ -
mmVML2VC0_VM_INVALIDATE_ENG0_REQ;
hub[i]->eng_addr_distance = mmVML2VC0_VM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
}

View file

@ -0,0 +1,130 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MMSCH_V3_0_H__
#define __MMSCH_V3_0_H__
#include "amdgpu_vcn.h"
#define MMSCH_VERSION_MAJOR 3
#define MMSCH_VERSION_MINOR 0
#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
enum mmsch_v3_0_command_type {
MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
MMSCH_COMMAND__END = 0xf
};
struct mmsch_v3_0_table_info {
uint32_t init_status;
uint32_t table_offset;
uint32_t table_size;
};
struct mmsch_v3_0_init_header {
uint32_t version;
uint32_t total_size;
struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES];
};
struct mmsch_v3_0_cmd_direct_reg_header {
uint32_t reg_offset : 28;
uint32_t command_type : 4;
};
struct mmsch_v3_0_cmd_indirect_reg_header {
uint32_t reg_offset : 20;
uint32_t reg_idx_space : 8;
uint32_t command_type : 4;
};
struct mmsch_v3_0_cmd_direct_write {
struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
uint32_t reg_value;
};
struct mmsch_v3_0_cmd_direct_read_modify_write {
struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
uint32_t write_data;
uint32_t mask_value;
};
struct mmsch_v3_0_cmd_direct_polling {
struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
uint32_t mask_value;
uint32_t wait_value;
};
struct mmsch_v3_0_cmd_end {
struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
};
struct mmsch_v3_0_cmd_indirect_write {
struct mmsch_v3_0_cmd_indirect_reg_header cmd_header;
uint32_t reg_value;
};
#define MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
size = sizeof(struct mmsch_v3_0_cmd_direct_read_modify_write); \
size_dw = size / 4; \
direct_rd_mod_wt.cmd_header.reg_offset = reg; \
direct_rd_mod_wt.mask_value = mask; \
direct_rd_mod_wt.write_data = data; \
memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
table_loc += size_dw; \
table_size += size_dw; \
}
#define MMSCH_V3_0_INSERT_DIRECT_WT(reg, value) { \
size = sizeof(struct mmsch_v3_0_cmd_direct_write); \
size_dw = size / 4; \
direct_wt.cmd_header.reg_offset = reg; \
direct_wt.reg_value = value; \
memcpy((void *)table_loc, &direct_wt, size); \
table_loc += size_dw; \
table_size += size_dw; \
}
#define MMSCH_V3_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
size = sizeof(struct mmsch_v3_0_cmd_direct_polling); \
size_dw = size / 4; \
direct_poll.cmd_header.reg_offset = reg; \
direct_poll.mask_value = mask; \
direct_poll.wait_value = wait; \
memcpy((void *)table_loc, &direct_poll, size); \
table_loc += size_dw; \
table_size += size_dw; \
}
#define MMSCH_V3_0_INSERT_END() { \
size = sizeof(struct mmsch_v3_0_cmd_end); \
size_dw = size / 4; \
memcpy((void *)table_loc, &end, size); \
table_loc += size_dw; \
table_size += size_dw; \
}
#endif

View file

@ -270,6 +270,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (ih->use_bus_addr) {
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);
ih_chicken = REG_SET_FIELD(ih_chicken,
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);

View file

@ -265,17 +265,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
amdgpu_atombios_scratch_regs_engine_hung(adev, true);
dev_info(adev->dev, "GPU mode1 reset\n");
/* disable BM */
pci_clear_master(adev->pdev);
pci_save_state(adev->pdev);
ret = psp_gpu_reset(adev);
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
dev_info(adev->dev, "GPU smu mode1 reset\n");
ret = amdgpu_dpm_mode1_reset(adev);
} else {
dev_info(adev->dev, "GPU psp mode1 reset\n");
ret = psp_gpu_reset(adev);
}
if (ret)
dev_err(adev->dev, "GPU mode1 reset failed\n");
pci_restore_state(adev->pdev);
/* wait for asic to come out of reset */
@ -307,7 +311,15 @@ nv_asic_reset_method(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
if (smu_baco_is_support(smu))
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_MODE1;
@ -319,15 +331,16 @@ static int nv_asic_reset(struct amdgpu_device *adev)
struct smu_context *smu = &adev->smu;
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
dev_info(adev->dev, "GPU BACO reset\n");
ret = smu_baco_enter(smu);
if (ret)
return ret;
ret = smu_baco_exit(smu);
if (ret)
return ret;
} else {
} else
ret = nv_asic_mode1_reset(adev);
}
return ret;
}
@ -411,6 +424,7 @@ legacy_init:
navi12_reg_base_init(adev);
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
sienna_cichlid_reg_base_init(adev);
break;
default:
@ -420,6 +434,11 @@ legacy_init:
return 0;
}
void nv_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_nv_virt_ops;
}
int nv_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
@ -427,12 +446,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.funcs = &nbio_v2_3_funcs;
adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
if (amdgpu_sriov_vf(adev)) {
adev->virt.ops = &xgpu_nv_virt_ops;
/* try send GPU_INIT_DATA request to host */
amdgpu_virt_request_init_data(adev);
}
/* Set IP register base before any HW register access */
r = nv_reg_base_init(adev);
if (r)
@ -504,10 +517,35 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
if (adev->enable_mes)
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
break;
case CHIP_NAVY_FLOUNDER:
amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
is_support_sw_smu(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
is_support_sw_smu(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
break;
default:
return -EINVAL;
}
@ -708,8 +746,7 @@ static int nv_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB |
AMD_PG_SUPPORT_MMHUB;
AMD_PG_SUPPORT_ATHUB;
/* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
* as a consequence, the rev_id and external_rev_id are wrong.
* workaround it by hardcoding rev_id to 0 (default value).
@ -732,9 +769,34 @@ static int nv_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
AMD_PG_SUPPORT_ATHUB |
AMD_PG_SUPPORT_MMHUB;
if (amdgpu_sriov_vf(adev)) {
/* hypervisor control CG and PG enablement */
adev->cg_flags = 0;
adev->pg_flags = 0;
}
adev->external_rev_id = adev->rev_id + 0x28;
break;
case CHIP_NAVY_FLOUNDER:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_IH_CG;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB |
AMD_PG_SUPPORT_MMHUB;
adev->external_rev_id = adev->rev_id + 0x32;
break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@ -961,6 +1023,7 @@ static int nv_common_set_clockgating_state(void *handle,
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,

View file

@ -28,6 +28,7 @@
void nv_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void nv_set_virt_ops(struct amdgpu_device *adev);
int nv_set_ip_blocks(struct amdgpu_device *adev);
int navi10_reg_base_init(struct amdgpu_device *adev);
int navi14_reg_base_init(struct amdgpu_device *adev);

View file

@ -57,6 +57,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
MODULE_FIRMWARE("amdgpu/sienna_cichlid_asd.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_asd.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@ -100,6 +102,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
case CHIP_NAVY_FLOUNDER:
chip_name = "navy_flounder";
break;
default:
BUG();
}
@ -108,7 +113,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
if (err)
return err;
if (adev->asic_type != CHIP_SIENNA_CICHLID) {
if (adev->asic_type != CHIP_SIENNA_CICHLID &&
adev->asic_type != CHIP_NAVY_FLOUNDER) {
err = psp_init_asd_microcode(psp, chip_name);
if (err)
return err;
@ -173,6 +179,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
}
break;
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
break;
default:
BUG();
@ -397,7 +404,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
struct amdgpu_device *adev = psp->adev;
if ((!amdgpu_sriov_vf(adev)) &&
(adev->asic_type != CHIP_SIENNA_CICHLID))
(adev->asic_type != CHIP_SIENNA_CICHLID) &&
(adev->asic_type != CHIP_NAVY_FLOUNDER))
psp_v11_0_reroute_ih(psp);
ring = &psp->km_ring;

View file

@ -315,30 +315,20 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u64 *wptr = NULL;
uint64_t local_wptr = 0;
u64 wptr;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
*wptr = (*wptr) >> 2;
DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
wptr = &local_wptr;
lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
ring->me, highbit, lowbit);
*wptr = highbit;
*wptr = (*wptr) << 32;
*wptr |= lowbit;
wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
wptr = wptr << 32;
wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
}
return *wptr;
return wptr >> 2;
}
/**
@ -485,7 +475,6 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
@ -508,8 +497,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
/* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) {
if (flags & AMDGPU_FENCE_FLAG_INT) {
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
@ -887,10 +875,6 @@ static int sdma_v5_0_start(struct amdgpu_device *adev)
r = sdma_v5_0_load_microcode(adev);
if (r)
return r;
/* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d)
msleep(1000);
}
/* unhalt the MEs */

View file

@ -45,6 +45,7 @@
#include "sdma_v5_2.h"
MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA3_REG_OFFSET 0x400
@ -85,6 +86,7 @@ static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
break;
default:
break;
@ -152,6 +154,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
chip_name = "sienna_cichlid";
break;
case CHIP_NAVY_FLOUNDER:
chip_name = "navy_flounder";
break;
default:
BUG();
}
@ -167,7 +172,8 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
goto out;
for (i = 1; i < adev->sdma.num_instances; i++) {
if (adev->asic_type == CHIP_SIENNA_CICHLID) {
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER) {
memcpy((void*)&adev->sdma.instance[i],
(void*)&adev->sdma.instance[0],
sizeof(struct amdgpu_sdma_instance));
@ -262,30 +268,20 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u64 *wptr = NULL;
uint64_t local_wptr = 0;
u64 wptr;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
*wptr = (*wptr) >> 2;
DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
wptr = &local_wptr;
lowbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
highbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
ring->me, highbit, lowbit);
*wptr = highbit;
*wptr = (*wptr) << 32;
*wptr |= lowbit;
wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
wptr = wptr << 32;
wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
}
return *wptr;
return wptr >> 2;
}
/**
@ -417,7 +413,6 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
@ -440,8 +435,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, upper_32_bits(seq));
}
/* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) {
if (flags & AMDGPU_FENCE_FLAG_INT) {
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
@ -1167,7 +1161,16 @@ static int sdma_v5_2_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->sdma.num_instances = 4;
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
adev->sdma.num_instances = 4;
break;
case CHIP_NAVY_FLOUNDER:
adev->sdma.num_instances = 2;
break;
default:
break;
}
sdma_v5_2_set_ring_funcs(adev);
sdma_v5_2_set_buffer_funcs(adev);
@ -1177,6 +1180,40 @@ static int sdma_v5_2_early_init(void *handle)
return 0;
}
static unsigned sdma_v5_2_seq_to_irq_id(int seq_num)
{
switch (seq_num) {
case 0:
return SOC15_IH_CLIENTID_SDMA0;
case 1:
return SOC15_IH_CLIENTID_SDMA1;
case 2:
return SOC15_IH_CLIENTID_SDMA2;
case 3:
return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid;
default:
break;
}
return -EINVAL;
}
static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
{
switch (seq_num) {
case 0:
return SDMA0_5_0__SRCID__SDMA_TRAP;
case 1:
return SDMA1_5_0__SRCID__SDMA_TRAP;
case 2:
return SDMA2_5_0__SRCID__SDMA_TRAP;
case 3:
return SDMA3_5_0__SRCID__SDMA_TRAP;
default:
break;
}
return -EINVAL;
}
static int sdma_v5_2_sw_init(void *handle)
{
struct amdgpu_ring *ring;
@ -1184,32 +1221,13 @@ static int sdma_v5_2_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
SDMA0_5_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1,
SDMA1_5_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA2,
SDMA2_5_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid,
SDMA3_5_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
for (i = 0; i < adev->sdma.num_instances; i++) {
r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i),
sdma_v5_2_seq_to_trap_id(i),
&adev->sdma.trap_irq);
if (r)
return r;
}
r = sdma_v5_2_init_microcode(adev);
if (r) {
@ -1545,6 +1563,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
sdma_v5_2_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
sdma_v5_2_update_medium_grain_light_sleep(adev,
@ -1572,7 +1591,7 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
*flags = 0;
/* AMD_CG_SUPPORT_SDMA_LS */
data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}

View file

@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
static enum amd_reset_method
si_asic_reset_method(struct amdgpu_device *adev)
{
if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
return AMD_RESET_METHOD_LEGACY;
}
@ -1267,12 +1272,6 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
return reference_clock;
}
//xxx:not implemented
static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
{
return 0;
}
static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg) {
@ -1428,6 +1427,358 @@ static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
return (nak_r + nak_g);
}
static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev,
unsigned cg_upll_func_cntl)
{
unsigned i;
/* Make sure UPLL_CTLREQ is deasserted */
WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
mdelay(10);
/* Assert UPLL_CTLREQ */
WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* Wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32(cg_upll_func_cntl) & mask) == mask)
break;
mdelay(10);
}
/* Deassert UPLL_CTLREQ */
WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
DRM_ERROR("Timeout setting UVD clocks!\n");
return -ETIMEDOUT;
}
return 0;
}
static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq,
unsigned target_freq,
unsigned pd_min,
unsigned pd_even)
{
unsigned post_div = vco_freq / target_freq;
/* Adjust to post divider minimum value */
if (post_div < pd_min)
post_div = pd_min;
/* We alway need a frequency less than or equal the target */
if ((vco_freq / post_div) > target_freq)
post_div += 1;
/* Post dividers above a certain value must be even */
if (post_div > pd_even && post_div % 2)
post_div += 1;
return post_div;
}
/**
* si_calc_upll_dividers - calc UPLL clock dividers
*
* @adev: amdgpu_device pointer
* @vclk: wanted VCLK
* @dclk: wanted DCLK
* @vco_min: minimum VCO frequency
* @vco_max: maximum VCO frequency
* @fb_factor: factor to multiply vco freq with
* @fb_mask: limit and bitmask for feedback divider
* @pd_min: post divider minimum
* @pd_max: post divider maximum
* @pd_even: post divider must be even above this value
* @optimal_fb_div: resulting feedback divider
* @optimal_vclk_div: resulting vclk post divider
* @optimal_dclk_div: resulting dclk post divider
*
* Calculate dividers for UVDs UPLL (except APUs).
* Returns zero on success; -EINVAL on error.
*/
static int si_calc_upll_dividers(struct amdgpu_device *adev,
unsigned vclk, unsigned dclk,
unsigned vco_min, unsigned vco_max,
unsigned fb_factor, unsigned fb_mask,
unsigned pd_min, unsigned pd_max,
unsigned pd_even,
unsigned *optimal_fb_div,
unsigned *optimal_vclk_div,
unsigned *optimal_dclk_div)
{
unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq;
/* Start off with something large */
unsigned optimal_score = ~0;
/* Loop through vco from low to high */
vco_min = max(max(vco_min, vclk), dclk);
for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
unsigned vclk_div, dclk_div, score;
do_div(fb_div, ref_freq);
/* fb div out of range ? */
if (fb_div > fb_mask)
break; /* It can oly get worse */
fb_div &= fb_mask;
/* Calc vclk divider with current vco freq */
vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk,
pd_min, pd_even);
if (vclk_div > pd_max)
break; /* vco is too big, it has to stop */
/* Calc dclk divider with current vco freq */
dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk,
pd_min, pd_even);
if (dclk_div > pd_max)
break; /* vco is too big, it has to stop */
/* Calc score with current vco freq */
score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
/* Determine if this vco setting is better than current optimal settings */
if (score < optimal_score) {
*optimal_fb_div = fb_div;
*optimal_vclk_div = vclk_div;
*optimal_dclk_div = dclk_div;
optimal_score = score;
if (optimal_score == 0)
break; /* It can't get better than this */
}
}
/* Did we found a valid setup ? */
if (optimal_score == ~0)
return -EINVAL;
return 0;
}
static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
{
unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
int r;
/* Bypass vclk and dclk with bclk */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
/* Put PLL in bypass mode */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
if (!vclk || !dclk) {
/* Keep the Bypass mode */
return 0;
}
r = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000,
16384, 0x03FFFFFF, 0, 128, 5,
&fb_div, &vclk_div, &dclk_div);
if (r)
return r;
/* Set RESET_ANTI_MUX to 0 */
WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
/* Set VCO_MODE to 1 */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
/* Disable sleep mode */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
/* Deassert UPLL_RESET */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
mdelay(1);
r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
/* Assert UPLL_RESET again */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
/* Disable spread spectrum. */
WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
/* Set feedback divider */
WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
/* Set ref divider to 0 */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
if (fb_div < 307200)
WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
else
WREG32_P(CG_UPLL_FUNC_CNTL_4,
UPLL_SPARE_ISPARE9,
~UPLL_SPARE_ISPARE9);
/* Set PDIV_A and PDIV_B */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
/* Give the PLL some time to settle */
mdelay(15);
/* Deassert PLL_RESET */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
mdelay(15);
/* Switch from bypass mode to normal mode */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
/* Switch VCLK and DCLK selection */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
mdelay(100);
return 0;
}
static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
{
unsigned i;
/* Make sure VCEPLL_CTLREQ is deasserted */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
mdelay(10);
/* Assert UPLL_CTLREQ */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
/* Wait for CTLACK and CTLACK2 to get asserted */
for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
break;
mdelay(10);
}
/* Deassert UPLL_CTLREQ */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
DRM_ERROR("Timeout setting UVD clocks!\n");
return -ETIMEDOUT;
}
return 0;
}
static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
{
unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
int r;
/* Bypass evclk and ecclk with bclk */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
/* Put PLL in bypass mode */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
~VCEPLL_BYPASS_EN_MASK);
if (!evclk || !ecclk) {
/* Keep the Bypass mode, put PLL to sleep */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
~VCEPLL_SLEEP_MASK);
return 0;
}
r = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000,
16384, 0x03FFFFFF, 0, 128, 5,
&fb_div, &evclk_div, &ecclk_div);
if (r)
return r;
/* Set RESET_ANTI_MUX to 0 */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
/* Set VCO_MODE to 1 */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
~VCEPLL_VCO_MODE_MASK);
/* Toggle VCEPLL_SLEEP to 1 then back to 0 */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
~VCEPLL_SLEEP_MASK);
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
/* Deassert VCEPLL_RESET */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
mdelay(1);
r = si_vce_send_vcepll_ctlreq(adev);
if (r)
return r;
/* Assert VCEPLL_RESET again */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
/* Disable spread spectrum. */
WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
/* Set feedback divider */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3,
VCEPLL_FB_DIV(fb_div),
~VCEPLL_FB_DIV_MASK);
/* Set ref divider to 0 */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
/* Set PDIV_A and PDIV_B */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
/* Give the PLL some time to settle */
mdelay(15);
/* Deassert PLL_RESET */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
mdelay(15);
/* Switch from bypass mode to normal mode */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
r = si_vce_send_vcepll_ctlreq(adev);
if (r)
return r;
/* Switch VCLK and DCLK selection */
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
mdelay(100);
return 0;
}
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@ -1438,7 +1789,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.set_vga_state = &si_vga_set_state,
.get_xclk = &si_get_xclk,
.set_uvd_clocks = &si_set_uvd_clocks,
.set_vce_clocks = NULL,
.set_vce_clocks = &si_set_vce_clocks,
.get_pcie_lanes = &si_get_pcie_lanes,
.set_pcie_lanes = &si_set_pcie_lanes,
.get_config_memsize = &si_get_config_memsize,

View file

@ -6953,6 +6953,24 @@ static int si_power_control_set_level(struct amdgpu_device *adev)
return 0;
}
static void si_set_vce_clock(struct amdgpu_device *adev,
struct amdgpu_ps *new_rps,
struct amdgpu_ps *old_rps)
{
if ((old_rps->evclk != new_rps->evclk) ||
(old_rps->ecclk != new_rps->ecclk)) {
/* Turn the clocks on when encoding, off otherwise */
if (new_rps->evclk || new_rps->ecclk) {
/* Place holder for future VCE1.0 porting to amdgpu
vce_v1_0_enable_mgcg(adev, false, false);*/
} else {
/* Place holder for future VCE1.0 porting to amdgpu
vce_v1_0_enable_mgcg(adev, true, false);
amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/
}
}
}
static int si_dpm_set_power_state(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -7029,6 +7047,7 @@ static int si_dpm_set_power_state(void *handle)
return ret;
}
ni_set_uvd_clock_after_set_eng_clock(adev, new_ps, old_ps);
si_set_vce_clock(adev, new_ps, old_ps);
if (eg_pi->pcie_performance_request)
si_notify_link_speed_change_after_state_change(adev, new_ps, old_ps);
ret = si_set_power_state_conditionally_enable_ulv(adev, new_ps);

View file

@ -121,7 +121,6 @@
#define CURSOR_UPDATE_LOCK (1 << 16)
#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
#define AMDGPU_NUM_OF_VMIDS 8
#define SI_CRTC0_REGISTER_OFFSET 0
#define SI_CRTC1_REGISTER_OFFSET 0x300
#define SI_CRTC2_REGISTER_OFFSET 0x2600

View file

@ -47,8 +47,7 @@
#define SI_MAX_LDS_NUM 0xFFFF
#define SI_MAX_TCC 16
#define SI_MAX_TCC_MASK 0xFFFF
#define AMDGPU_NUM_OF_VMIDS 8
#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
/* SMC IND accessor regs */
#define SMC_IND_INDEX_0 0x80
@ -2460,4 +2459,36 @@
#define MC_VM_FB_OFFSET 0x81a
/* Discrete VCE clocks */
#define CG_VCEPLL_FUNC_CNTL 0xc0030600
#define VCEPLL_RESET_MASK 0x00000001
#define VCEPLL_SLEEP_MASK 0x00000002
#define VCEPLL_BYPASS_EN_MASK 0x00000004
#define VCEPLL_CTLREQ_MASK 0x00000008
#define VCEPLL_VCO_MODE_MASK 0x00000600
#define VCEPLL_REF_DIV_MASK 0x003F0000
#define VCEPLL_CTLACK_MASK 0x40000000
#define VCEPLL_CTLACK2_MASK 0x80000000
#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601
#define VCEPLL_PDIV_A(x) ((x) << 0)
#define VCEPLL_PDIV_A_MASK 0x0000007F
#define VCEPLL_PDIV_B(x) ((x) << 8)
#define VCEPLL_PDIV_B_MASK 0x00007F00
#define EVCLK_SRC_SEL(x) ((x) << 20)
#define EVCLK_SRC_SEL_MASK 0x01F00000
#define ECCLK_SRC_SEL(x) ((x) << 25)
#define ECCLK_SRC_SEL_MASK 0x3E000000
#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602
#define VCEPLL_FB_DIV(x) ((x) << 0)
#define VCEPLL_FB_DIV_MASK 0x01FFFFFF
#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603
#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604
#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606
#define VCEPLL_SSEN_MASK 0x00000001
#endif

View file

@ -532,6 +532,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
bool baco_reset = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
switch (adev->asic_type) {
case CHIP_RAVEN:
case CHIP_RENOIR:
@ -669,7 +678,7 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
return adev->nbio.funcs->get_rev_id(adev);
}
int soc15_set_ip_blocks(struct amdgpu_device *adev)
static void soc15_reg_base_init(struct amdgpu_device *adev)
{
int r;
@ -681,6 +690,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
vega10_reg_base_init(adev);
break;
case CHIP_RENOIR:
/* It's safe to do ip discovery here for Renior,
* it doesn't support SRIOV. */
if (amdgpu_discovery) {
r = amdgpu_discovery_reg_base_init(adev);
if (r) {
@ -697,8 +708,26 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
arct_reg_base_init(adev);
break;
default:
return -EINVAL;
DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type);
break;
}
}
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_ai_virt_ops;
/* init soc15 reg base early enough so we can
* request request full access for sriov before
* set_ip_blocks. */
soc15_reg_base_init(adev);
}
int soc15_set_ip_blocks(struct amdgpu_device *adev)
{
/* for bare metal case */
if (!amdgpu_sriov_vf(adev))
soc15_reg_base_init(adev);
if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
adev->gmc.xgmi.supported = true;
@ -722,9 +751,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
adev->rev_id = soc15_get_rev_id(adev);
if (amdgpu_sriov_vf(adev))
adev->virt.ops = &xgpu_ai_virt_ops;
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:

View file

@ -90,6 +90,7 @@ struct soc15_ras_field_entry {
void soc15_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void soc15_set_virt_ops(struct amdgpu_device *adev);
int soc15_set_ip_blocks(struct amdgpu_device *adev);
void soc15_program_register_sequence(struct amdgpu_device *adev,

View file

@ -1375,7 +1375,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for reg writes */
data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
@ -1417,7 +1417,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for reg writes */
uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
vmid * hub->ctx_addr_distance,
lower_32_bits(pd_addr), 0xffffffff);
}

View file

@ -991,7 +991,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for reg writes */
vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
vmid * hub->ctx_addr_distance,
lower_32_bits(pd_addr), 0xffffffff);
}

View file

@ -1539,7 +1539,7 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for register write */
data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
@ -1679,7 +1679,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for reg writes */
vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
vmid * hub->ctx_addr_distance,
lower_32_bits(pd_addr), 0xffffffff);
}

View file

@ -1505,7 +1505,7 @@ void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for register write */
data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
@ -1660,7 +1660,8 @@ void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for reg writes */
vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
vmid * hub->ctx_addr_distance,
lower_32_bits(pd_addr), 0xffffffff);
}

View file

@ -28,6 +28,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
#include "mmsch_v3_0.h"
#include "vcn/vcn_3_0_0_offset.h"
#include "vcn/vcn_3_0_0_sh_mask.h"
@ -48,6 +49,17 @@
#define VCN_INSTANCES_SIENNA_CICHLID 2
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
static int amdgpu_ucode_id_vcns[] = {
AMDGPU_UCODE_ID_VCN,
AMDGPU_UCODE_ID_VCN1
};
static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
@ -56,10 +68,8 @@ static int vcn_v3_0_set_powergating_state(void *handle,
static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v3_0_early_init - set function pointers
@ -71,25 +81,33 @@ static int amdgpu_ih_clientid_vcns[] = {
static int vcn_v3_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->asic_type == CHIP_SIENNA_CICHLID) {
u32 harvest;
int i;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
adev->vcn.harvest_config |= 1 << i;
}
adev->vcn.harvest_config = 0;
adev->vcn.num_enc_rings = 1;
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
AMDGPU_VCN_HARVEST_VCN1))
/* both instances are harvested, disable the block */
return -ENOENT;
} else
adev->vcn.num_vcn_inst = 1;
} else {
if (adev->asic_type == CHIP_SIENNA_CICHLID) {
u32 harvest;
int i;
adev->vcn.num_enc_rings = 2;
adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
adev->vcn.harvest_config |= 1 << i;
}
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
AMDGPU_VCN_HARVEST_VCN1))
/* both instances are harvested, disable the block */
return -ENOENT;
} else
adev->vcn.num_vcn_inst = 1;
adev->vcn.num_enc_rings = 2;
}
vcn_v3_0_set_dec_ring_funcs(adev);
vcn_v3_0_set_enc_ring_funcs(adev);
@ -109,6 +127,7 @@ static int vcn_v3_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
int i, j, r;
int vcn_doorbell_index = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
r = amdgpu_vcn_sw_init(adev);
@ -136,6 +155,12 @@ static int vcn_v3_0_sw_init(void *handle)
if (r)
return r;
if (amdgpu_sriov_vf(adev)) {
vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
/* get DWORD offset */
vcn_doorbell_index = vcn_doorbell_index << 1;
}
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
@ -166,7 +191,13 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
if (amdgpu_sriov_vf(adev)) {
ring->doorbell_index = vcn_doorbell_index;
/* NOTE: increment so next VCN engine use next DOORBELL DWORD */
vcn_doorbell_index++;
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
}
if (i != 0)
ring->no_scheduler = true;
sprintf(ring->name, "vcn_dec_%d", i);
@ -184,7 +215,13 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
if (amdgpu_sriov_vf(adev)) {
ring->doorbell_index = vcn_doorbell_index;
/* NOTE: increment so next VCN engine use next DOORBELL DWORD */
vcn_doorbell_index++;
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
}
if (i != 1)
ring->no_scheduler = true;
sprintf(ring->name, "vcn_enc_%d.%d", i, j);
@ -195,6 +232,11 @@ static int vcn_v3_0_sw_init(void *handle)
}
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
@ -213,6 +255,9 @@ static int vcn_v3_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@ -235,24 +280,50 @@ static int vcn_v3_0_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, j, r;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
ring = &adev->vcn.inst[i].ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
ring->doorbell_index, i);
r = amdgpu_ring_test_helper(ring);
if (amdgpu_sriov_vf(adev)) {
r = vcn_v3_0_start_sriov(adev);
if (r)
goto done;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
/* initialize VCN dec and enc ring buffers */
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
ring = &adev->vcn.inst[i].ring_dec;
ring->wptr = 0;
ring->wptr_old = 0;
vcn_v3_0_dec_ring_set_wptr(ring);
ring->sched.ready = true;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
ring->wptr = 0;
ring->wptr_old = 0;
vcn_v3_0_enc_ring_set_wptr(ring);
ring->sched.ready = true;
}
}
} else {
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
ring = &adev->vcn.inst[i].ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
ring->doorbell_index, i);
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
}
@ -283,11 +354,13 @@ static int vcn_v3_0_hw_fini(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
}
}
ring->sched.ready = false;
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
@ -1137,6 +1210,221 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
return 0;
}
static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
{
int i, j;
struct amdgpu_ring *ring;
uint64_t cache_addr;
uint64_t rb_addr;
uint64_t ctx_addr;
uint32_t param, resp, expected;
uint32_t offset, cache_size;
uint32_t tmp, timeout;
uint32_t id;
struct amdgpu_mm_table *table = &adev->virt.mm_table;
uint32_t *table_loc;
uint32_t table_size;
uint32_t size, size_dw;
struct mmsch_v3_0_cmd_direct_write
direct_wt = { {0} };
struct mmsch_v3_0_cmd_direct_read_modify_write
direct_rd_mod_wt = { {0} };
struct mmsch_v3_0_cmd_direct_polling
direct_poll = { {0} };
struct mmsch_v3_0_cmd_end end = { {0} };
struct mmsch_v3_0_init_header header;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
direct_rd_mod_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
direct_poll.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_POLLING;
end.cmd_header.command_type =
MMSCH_COMMAND__END;
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
header.inst[i].init_status = 0;
header.inst[i].table_offset = 0;
header.inst[i].table_size = 0;
}
table_loc = (uint32_t *)table->cpu_addr;
table_loc += header.total_size;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
table_size = 0;
MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
id = amdgpu_ucode_id_vcns[i];
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
adev->firmware.ucode[id].tmr_mc_addr_lo);
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
adev->firmware.ucode[id].tmr_mc_addr_hi);
offset = 0;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_OFFSET0),
0);
} else {
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[i].gpu_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[i].gpu_addr));
offset = cache_size;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_SIZE0),
cache_size);
cache_addr = adev->vcn.inst[i].gpu_addr + offset;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(cache_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(cache_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_OFFSET1),
0);
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_SIZE1),
AMDGPU_VCN_STACK_SIZE);
cache_addr = adev->vcn.inst[i].gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(cache_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(cache_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_OFFSET2),
0);
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
ring->wptr = 0;
rb_addr = ring->gpu_addr;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_RB_BASE_LO),
lower_32_bits(rb_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_RB_BASE_HI),
upper_32_bits(rb_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_RB_SIZE),
ring->ring_size / 4);
}
ring = &adev->vcn.inst[i].ring_dec;
ring->wptr = 0;
rb_addr = ring->gpu_addr;
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
lower_32_bits(rb_addr));
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
upper_32_bits(rb_addr));
/* force RBC into idle state */
tmp = order_base_2(ring->ring_size);
tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
mmUVD_RBC_RB_CNTL),
tmp);
/* add end packet */
MMSCH_V3_0_INSERT_END();
/* refine header */
header.inst[i].init_status = 1;
header.inst[i].table_offset = header.total_size;
header.inst[i].table_size = table_size;
header.total_size += table_size;
}
/* Update init table header in memory */
size = sizeof(struct mmsch_v3_0_init_header);
table_loc = (uint32_t *)table->cpu_addr;
memcpy((void *)table_loc, &header, size);
/* message MMSCH (in VCN[0]) to initialize this client
* 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
* of memory descriptor location
*/
ctx_addr = table->gpu_addr;
WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
/* 2, update vmid of descriptor */
tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
/* use domain0 for MM scheduler */
tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
/* 3, notify mmsch about the size of this descriptor */
size = header.total_size;
WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
/* 4, set resp to zero */
WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
/* 5, kick off the initialization and wait until
* MMSCH_VF_MAILBOX_RESP becomes non-zero
*/
param = 0x10000001;
WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
tmp = 0;
timeout = 1000;
resp = 0;
expected = param + 1;
while (resp != expected) {
resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
if (resp == expected)
break;
udelay(10);
tmp = tmp + 10;
if (tmp >= timeout) {
DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
" waiting for mmMMSCH_VF_MAILBOX_RESP "\
"(expected=0x%08x, readback=0x%08x)\n",
tmp, expected, resp);
return -EBUSY;
}
}
return 0;
}
static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
uint32_t tmp;
@ -1575,6 +1863,15 @@ static int vcn_v3_0_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
if(state == adev->vcn.cur_state)
return 0;

View file

@ -710,6 +710,14 @@ vi_asic_reset_method(struct amdgpu_device *adev)
{
bool baco_reset;
if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
if (amdgpu_reset_method != -1)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_TONGA:
@ -1705,11 +1713,13 @@ static const struct amdgpu_ip_block_version vi_common_ip_block =
.funcs = &vi_common_ip_funcs,
};
void vi_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_vi_virt_ops;
}
int vi_set_ip_blocks(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev))
adev->virt.ops = &xgpu_vi_virt_ops;
switch (adev->asic_type) {
case CHIP_TOPAZ:
/* topaz has no DCE, UVD, VCE */

View file

@ -28,6 +28,7 @@
void vi_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void vi_set_virt_ops(struct amdgpu_device *adev);
int vi_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);

View file

@ -67,8 +67,6 @@
#define HPD4_REGISTER_OFFSET (0x18b8 - 0x1898)
#define HPD5_REGISTER_OFFSET (0x18c0 - 0x1898)
#define AMDGPU_NUM_OF_VMIDS 8
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
#define VMID(x) ((x) << 4)

View file

@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_int_process_v9.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)

View file

@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "cik_int.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
struct kfd_vm_fault_info info;
kfd_smi_event_update_vmfault(dev, pasid);
kfd_process_vm_fault(dev->dqm, pasid);
memset(&info, 0, sizeof(info));

View file

@ -39,6 +39,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@ -1740,6 +1741,20 @@ err_unlock:
return r;
}
/* Handle requests for watching SMI events */
static int kfd_ioctl_smi_events(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_smi_events_args *args = data;
struct kfd_dev *dev;
dev = kfd_device_by_id(args->gpuid);
if (!dev)
return -EINVAL;
return kfd_smi_event_open(dev, &args->anon_fd);
}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@ -1835,6 +1850,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
kfd_ioctl_alloc_queue_gws, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)

View file

@ -679,6 +679,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;

View file

@ -74,6 +74,7 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
[CHIP_NAVI12] = &gfx_v10_kfd2kgd,
[CHIP_NAVI14] = &gfx_v10_kfd2kgd,
[CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
[CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd,
};
#ifdef KFD_SUPPORT_IOMMU_V2
@ -478,6 +479,24 @@ static const struct kfd_device_info sienna_cichlid_device_info = {
.num_sdma_queues_per_engine = 8,
};
static const struct kfd_device_info navy_flounder_device_info = {
.asic_family = CHIP_NAVY_FLOUNDER,
.asic_name = "navy_flounder",
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 8,
.ih_ring_entry_size = 8 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_v9,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
/* For each entry, [0] is regular and [1] is virtualisation device. */
static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@ -501,6 +520,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
[CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
[CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@ -602,15 +622,24 @@ static int kfd_gws_init(struct kfd_dev *kfd)
return 0;
if (hws_gws_support
|| (kfd->device_info->asic_family >= CHIP_VEGA10
|| (kfd->device_info->asic_family == CHIP_VEGA10
&& kfd->mec2_fw_version >= 0x81b3)
|| (kfd->device_info->asic_family >= CHIP_VEGA12
&& kfd->device_info->asic_family <= CHIP_RAVEN
&& kfd->mec2_fw_version >= 0x1b3))
&& kfd->mec2_fw_version >= 0x1b3)
|| (kfd->device_info->asic_family == CHIP_ARCTURUS
&& kfd->mec2_fw_version >= 0x30))
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
return ret;
}
static void kfd_smi_init(struct kfd_dev *dev) {
INIT_LIST_HEAD(&dev->smi_clients);
spin_lock_init(&dev->smi_lock);
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
@ -725,6 +754,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto kfd_topology_add_device_error;
}
kfd_smi_init(kfd);
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
kfd->pdev->device);

View file

@ -1939,6 +1939,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:

View file

@ -416,6 +416,7 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
kfd_init_apertures_v9(pdd, id);
break;
default:

View file

@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
info.prot_read = ring_id & 0x10;
info.prot_write = ring_id & 0x20;
kfd_smi_event_update_vmfault(dev, pasid);
kfd_process_vm_fault(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info);
}

View file

@ -113,7 +113,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =

View file

@ -160,7 +160,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =

View file

@ -117,7 +117,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
set_priority(m, q);
m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;

View file

@ -246,6 +246,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
pm->pmf = &kfd_v9_pm_funcs;
break;
default:

View file

@ -39,7 +39,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 1;
packet->bitfields2.process_quantum = 10;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;

View file

@ -50,7 +50,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 1;
packet->bitfields2.process_quantum = 10;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;

View file

@ -25,7 +25,7 @@
#include "amdgpu_ids.h"
static unsigned int pasid_bits = 16;
static const struct kfd2kgd_calls *kfd2kgd;
static bool pasids_allocated; /* = false */
bool kfd_set_pasid_limit(unsigned int new_limit)
{
@ -33,7 +33,7 @@ bool kfd_set_pasid_limit(unsigned int new_limit)
return false;
if (new_limit < (1U << pasid_bits)) {
if (kfd2kgd)
if (pasids_allocated)
/* We've already allocated user PASIDs, too late to
* change the limit
*/
@ -53,32 +53,17 @@ unsigned int kfd_get_pasid_limit(void)
unsigned int kfd_pasid_alloc(void)
{
int r;
int r = amdgpu_pasid_alloc(pasid_bits);
/* Find the first best KFD device for calling KGD */
if (!kfd2kgd) {
struct kfd_dev *dev = NULL;
unsigned int i = 0;
while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) {
if (dev && dev->kfd2kgd) {
kfd2kgd = dev->kfd2kgd;
break;
}
i++;
}
if (!kfd2kgd)
return false;
if (r > 0) {
pasids_allocated = true;
return r;
}
r = amdgpu_pasid_alloc(pasid_bits);
return r > 0 ? r : 0;
return 0;
}
void kfd_pasid_free(unsigned int pasid)
{
if (kfd2kgd)
amdgpu_pasid_free(pasid);
amdgpu_pasid_free(pasid);
}

View file

@ -97,7 +97,7 @@
* Size of the per-process TBA+TMA buffer: 2 pages
*
* The first page is the TBA used for the CWSR ISA code. The second
* page is used as TMA for daisy changing a user-mode trap handler.
* page is used as TMA for user-mode trap handler setup in daisy-chain mode.
*/
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
@ -157,29 +157,19 @@ extern int debug_largebar;
*/
extern int ignore_crat;
/*
* Set sh_mem_config.retry_disable on Vega10
*/
/* Set sh_mem_config.retry_disable on GFX v9 */
extern int amdgpu_noretry;
/*
* Halt if HWS hang is detected
*/
/* Halt if HWS hang is detected */
extern int halt_if_hws_hang;
/*
* Whether MEC FW support GWS barriers
*/
/* Whether MEC FW support GWS barriers */
extern bool hws_gws_support;
/*
* Queue preemption timeout in ms
*/
/* Queue preemption timeout in ms */
extern int queue_preemption_timeout_ms;
/*
* Enable eviction debug messages
*/
/* Enable eviction debug messages */
extern bool debug_evictions;
enum cache_policy {
@ -301,7 +291,7 @@ struct kfd_dev {
/* xGMI */
uint64_t hive_id;
/* UUID */
uint64_t unique_id;
@ -313,8 +303,12 @@ struct kfd_dev {
/* Compute Profile ref. count */
atomic_t compute_profile;
/* Global GWS resource shared b/t processes*/
/* Global GWS resource shared between processes */
void *gws;
/* Clients watching SMI events */
struct list_head smi_clients;
spinlock_t smi_lock;
};
enum kfd_mempool {
@ -329,7 +323,7 @@ void kfd_chardev_exit(void);
struct device *kfd_chardev(void);
/**
* enum kfd_unmap_queues_filter
* enum kfd_unmap_queues_filter - Enum for queue filters.
*
* @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
*
@ -348,15 +342,17 @@ enum kfd_unmap_queues_filter {
};
/**
* enum kfd_queue_type
* enum kfd_queue_type - Enum for various queue types.
*
* @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
*
* @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
* @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type.
*
* @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
*
* @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
*
* @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
*/
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
@ -402,9 +398,9 @@ enum KFD_QUEUE_PRIORITY {
*
* @write_ptr: Defines the number of dwords written to the ring buffer.
*
* @doorbell_ptr: This field aim is to notify the H/W of new packet written to
* the queue ring buffer. This field should be similar to write_ptr and the
* user should update this field after he updated the write_ptr.
* @doorbell_ptr: Notifies the H/W of new packet written to the queue ring
* buffer. This field should be similar to write_ptr and the user should
* update this field after updating the write_ptr.
*
* @doorbell_off: The doorbell offset in the doorbell pci-bar.
*
@ -473,7 +469,7 @@ struct queue_properties {
*
* @list: Queue linked list.
*
* @mqd: The queue MQD.
* @mqd: The queue MQD (memory queue descriptor).
*
* @mqd_mem_obj: The MQD local gpu memory object.
*
@ -482,7 +478,7 @@ struct queue_properties {
* @properties: The queue properties.
*
* @mec: Used only in no cp scheduling mode and identifies to micro engine id
* that the queue should be execute on.
* that the queue should be executed on.
*
* @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
* id.
@ -523,9 +519,6 @@ struct queue {
struct kobject kobj;
};
/*
* Please read the kfd_mqd_manager.h description.
*/
enum KFD_MQD_TYPE {
KFD_MQD_TYPE_HIQ = 0, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
@ -583,9 +576,7 @@ struct qcm_process_device {
*/
bool mapped_gws_queue;
/*
* All the memory management data should be here too
*/
/* All the memory management data should be here too */
uint64_t gds_context_area;
/* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
uint64_t page_table_base;
@ -785,11 +776,13 @@ extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
extern struct srcu_struct kfd_processes_srcu;
/**
* Ioctl function type.
* typedef amdkfd_ioctl_t - typedef for ioctl function pointer.
*
* \param filep pointer to file structure.
* \param p amdkfd process pointer.
* \param data pointer to arg that was copied from user.
* @filep: pointer to file structure.
* @p: amdkfd process pointer.
* @data: pointer to arg that was copied from user.
*
* Return: returns ioctl completion code.
*/
typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
void *data);

View file

@ -0,0 +1,227 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/poll.h>
#include <linux/wait.h>
#include <linux/anon_inodes.h>
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
struct kfd_smi_client {
struct list_head list;
struct kfifo fifo;
wait_queue_head_t wait_queue;
/* events enabled */
uint64_t events;
struct kfd_dev *dev;
spinlock_t lock;
};
#define MAX_KFIFO_SIZE 1024
static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
loff_t *);
static int kfd_smi_ev_release(struct inode *, struct file *);
static const char kfd_smi_name[] = "kfd_smi_ev";
static const struct file_operations kfd_smi_ev_fops = {
.owner = THIS_MODULE,
.poll = kfd_smi_ev_poll,
.read = kfd_smi_ev_read,
.write = kfd_smi_ev_write,
.release = kfd_smi_ev_release
};
static __poll_t kfd_smi_ev_poll(struct file *filep,
struct poll_table_struct *wait)
{
struct kfd_smi_client *client = filep->private_data;
__poll_t mask = 0;
poll_wait(filep, &client->wait_queue, wait);
spin_lock(&client->lock);
if (!kfifo_is_empty(&client->fifo))
mask = EPOLLIN | EPOLLRDNORM;
spin_unlock(&client->lock);
return mask;
}
static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
size_t size, loff_t *offset)
{
int ret;
size_t to_copy;
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
/* kfifo_to_user can sleep so we can't use spinlock protection around
* it. Instead, we kfifo out as spinlocked then copy them to the user.
*/
spin_lock(&client->lock);
to_copy = kfifo_len(&client->fifo);
if (!to_copy) {
spin_unlock(&client->lock);
ret = -EAGAIN;
goto ret_err;
}
to_copy = min3(size, sizeof(buf), to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
ret = -EAGAIN;
goto ret_err;
}
ret = copy_to_user(user, buf, to_copy);
if (ret) {
ret = -EFAULT;
goto ret_err;
}
kfree(buf);
return to_copy;
ret_err:
kfree(buf);
return ret;
}
static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
size_t size, loff_t *offset)
{
struct kfd_smi_client *client = filep->private_data;
uint64_t events;
if (!access_ok(user, size) || size < sizeof(events))
return -EFAULT;
if (copy_from_user(&events, user, sizeof(events)))
return -EFAULT;
WRITE_ONCE(client->events, events);
return sizeof(events);
}
static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
{
struct kfd_smi_client *client = filep->private_data;
struct kfd_dev *dev = client->dev;
spin_lock(&dev->smi_lock);
list_del_rcu(&client->list);
spin_unlock(&dev->smi_lock);
synchronize_rcu();
kfifo_free(&client->fifo);
kfree(client);
return 0;
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
*/
char fifo_in[43];
struct kfd_smi_client *client;
int len;
if (list_empty(&dev->smi_clients))
return;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, pasid, &task_info);
/* Report VM faults from user applications, not retry from kernel */
if (!task_info.pid)
return;
len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
rcu_read_lock();
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
continue;
spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
kfifo_in(&client->fifo, fifo_in, len);
wake_up_all(&client->wait_queue);
}
else
pr_debug("smi_event(vmfault): no space left\n");
spin_unlock(&client->lock);
}
rcu_read_unlock();
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
{
struct kfd_smi_client *client;
int ret;
client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
if (!client)
return -ENOMEM;
INIT_LIST_HEAD(&client->list);
ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
if (ret) {
kfree(client);
return ret;
}
ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
O_RDWR);
if (ret < 0) {
kfifo_free(&client->fifo);
kfree(client);
return ret;
}
*fd = ret;
init_waitqueue_head(&client->wait_queue);
spin_lock_init(&client->lock);
client->events = 0;
client->dev = dev;
spin_lock(&dev->smi_lock);
list_add_rcu(&client->list, &dev->smi_clients);
spin_unlock(&dev->smi_lock);
return 0;
}

View file

@ -0,0 +1,29 @@
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef KFD_SMI_EVENTS_H_INCLUDED
#define KFD_SMI_EVENTS_H_INCLUDED
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
#endif

View file

@ -1374,6 +1374,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_NAVI12:
case CHIP_NAVI14:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);

View file

@ -978,6 +978,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
/* Update the actual used number of crtc */
adev->mode_info.num_crtc = adev->dm.display_indexes_num;
/* create fake encoders for MST */
dm_dp_create_fake_mst_encoders(adev);
/* TODO: Add_display_info? */
/* TODO use dynamic cursor width */
@ -1001,6 +1004,12 @@ error:
static void amdgpu_dm_fini(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < adev->dm.display_indexes_num; i++) {
drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
}
amdgpu_dm_audio_fini(adev);
amdgpu_dm_destroy_drm_device(&adev->dm);
@ -1076,6 +1085,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case CHIP_RENOIR:
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
#endif
return 0;
case CHIP_NAVI12:
@ -1175,6 +1185,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
break;
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
dmub_asic = DMUB_ASIC_DCN30;
fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
break;
@ -2019,6 +2030,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
struct amdgpu_device *adev;
struct dc_link *link = NULL;
static const u8 pre_computed_values[] = {
50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69,
71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98};
@ -2026,6 +2038,10 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
if (!aconnector || !aconnector->dc_link)
return;
link = aconnector->dc_link;
if (link->connector_signal != SIGNAL_TYPE_EDP)
return;
conn_base = &aconnector->base;
adev = conn_base->dev->dev_private;
dm = &adev->dm;
@ -3216,6 +3232,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_RENOIR:
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
#endif
if (dcn10_register_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
@ -3373,6 +3390,7 @@ static int dm_early_init(void *handle)
case CHIP_NAVI12:
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
#endif
adev->mode_info.num_crtc = 6;
adev->mode_info.num_hpd = 6;
@ -3696,6 +3714,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
adev->asic_type == CHIP_NAVI12 ||
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER ||
#endif
adev->asic_type == CHIP_RENOIR ||
adev->asic_type == CHIP_RAVEN) {
@ -3717,9 +3736,9 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
tiling_info->gfx9.shaderEnable = 1;
#ifdef CONFIG_DRM_AMD_DC_DCN3_0
if (adev->asic_type == CHIP_SIENNA_CICHLID)
if (adev->asic_type == CHIP_SIENNA_CICHLID ||
adev->asic_type == CHIP_NAVY_FLOUNDER)
tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
#endif
ret = fill_plane_dcc_attributes(adev, afb, format, rotation,
plane_size, tiling_info,
@ -4556,24 +4575,20 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket, false, false);
if (stream->link->psr_settings.psr_feature_enabled) {
struct dc *core_dc = stream->link->ctx->dc;
if (dc_is_dmcu_initialized(core_dc)) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
stream->use_vsc_sdp_for_colorimetry = false;
if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
stream->use_vsc_sdp_for_colorimetry =
aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
} else {
if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
stream->use_vsc_sdp_for_colorimetry = true;
}
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket);
if (stream->link->psr_settings.psr_feature_enabled) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
stream->use_vsc_sdp_for_colorimetry = false;
if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
stream->use_vsc_sdp_for_colorimetry =
aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
} else {
if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
stream->use_vsc_sdp_for_colorimetry = true;
}
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket);
}
finish:
dc_sink_release(sink);
@ -4639,7 +4654,6 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
}
state->active_planes = cur->active_planes;
state->interrupts_enabled = cur->interrupts_enabled;
state->vrr_params = cur->vrr_params;
state->vrr_infopacket = cur->vrr_infopacket;
state->abm_level = cur->abm_level;
@ -5300,29 +5314,19 @@ static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
return num_active;
}
/*
* Sets whether interrupts should be enabled on a specific CRTC.
* We require that the stream be enabled and that there exist active
* DC planes on the stream.
*/
static void
dm_update_crtc_interrupt_state(struct drm_crtc *crtc,
struct drm_crtc_state *new_crtc_state)
static void dm_update_crtc_active_planes(struct drm_crtc *crtc,
struct drm_crtc_state *new_crtc_state)
{
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
dm_new_crtc_state->active_planes = 0;
dm_new_crtc_state->interrupts_enabled = false;
if (!dm_new_crtc_state->stream)
return;
dm_new_crtc_state->active_planes =
count_crtc_active_planes(new_crtc_state);
dm_new_crtc_state->interrupts_enabled =
dm_new_crtc_state->active_planes > 0;
}
static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
@ -5333,13 +5337,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(state);
int ret = -EINVAL;
/*
* Update interrupt state for the CRTC. This needs to happen whenever
* the CRTC has changed or whenever any of its planes have changed.
* Atomic check satisfies both of these requirements since the CRTC
* is added to the state by DRM during drm_atomic_helper_check_planes.
*/
dm_update_crtc_interrupt_state(crtc, state);
dm_update_crtc_active_planes(crtc, state);
if (unlikely(!dm_crtc_state->stream &&
modeset_required(state, NULL, dm_crtc_state->stream))) {
@ -5864,6 +5862,7 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
uint32_t formats[32];
int num_formats;
int res = -EPERM;
unsigned int supported_rotations;
num_formats = get_plane_formats(plane, plane_cap, formats,
ARRAY_SIZE(formats));
@ -5898,6 +5897,13 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE);
}
supported_rotations =
DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
supported_rotations);
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
/* Create (reset) the plane state */
@ -6442,8 +6448,10 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
bool enable)
{
/*
* this is not correct translation but will work as soon as VBLANK
* constant is the same as PFLIP
* We have no guarantee that the frontend index maps to the same
* backend index - some even map to more than one.
*
* TODO: Use a different interrupt or check DC itself for the mapping.
*/
int irq_type =
amdgpu_display_crtc_idx_to_irq_type(
@ -6466,6 +6474,19 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
}
}
static void dm_update_pflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc)
{
int irq_type =
amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id);
/**
* This reads the current state for the IRQ and force reapplies
* the setting to hardware.
*/
amdgpu_irq_update(adev, &adev->pageflip_irq, irq_type);
}
static bool
is_scaling_state_different(const struct dm_connector_state *dm_state,
const struct dm_connector_state *old_dm_state)
@ -7050,7 +7071,16 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
usleep_range(1000, 1100);
}
if (acrtc_attach->base.state->event) {
/**
* Prepare the flip event for the pageflip interrupt to handle.
*
* This only works in the case where we've already turned on the
* appropriate hardware blocks (eg. HUBP) so in the transition case
* from 0 -> n planes we have to skip a hardware generated event
* and rely on sending it from software.
*/
if (acrtc_attach->base.state->event &&
acrtc_state->active_planes > 0) {
drm_crtc_vblank_get(pcrtc);
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
@ -7119,6 +7149,24 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
&bundle->stream_update,
dc_state);
/**
* Enable or disable the interrupts on the backend.
*
* Most pipes are put into power gating when unused.
*
* When power gating is enabled on a pipe we lose the
* interrupt enablement state when power gating is disabled.
*
* So we need to update the IRQ control state in hardware
* whenever the pipe turns on (since it could be previously
* power gated) or off (since some pipes can't be power gated
* on some ASICs).
*/
if (dm_old_crtc_state->active_planes != acrtc_state->active_planes)
dm_update_pflip_irq_state(
(struct amdgpu_device *)dev->dev_private,
acrtc_attach);
if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
!acrtc_state->stream->link->psr_settings.psr_feature_enabled)
@ -7219,64 +7267,6 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
}
}
/*
* Enable interrupts on CRTCs that are newly active, undergone
* a modeset, or have active planes again.
*
* Done in two passes, based on the for_modeset flag:
* Pass 1: For CRTCs going through modeset
* Pass 2: For CRTCs going from 0 to n active planes
*
* Interrupts can only be enabled after the planes are programmed,
* so this requires a two-pass approach since we don't want to
* just defer the interrupts until after commit planes every time.
*/
static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev,
struct drm_atomic_state *state,
bool for_modeset)
{
struct amdgpu_device *adev = dev->dev_private;
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
int i;
#ifdef CONFIG_DEBUG_FS
enum amdgpu_dm_pipe_crc_source source;
#endif
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
struct dm_crtc_state *dm_old_crtc_state =
to_dm_crtc_state(old_crtc_state);
bool modeset = drm_atomic_crtc_needs_modeset(new_crtc_state);
bool run_pass;
run_pass = (for_modeset && modeset) ||
(!for_modeset && !modeset &&
!dm_old_crtc_state->interrupts_enabled);
if (!run_pass)
continue;
if (!dm_new_crtc_state->interrupts_enabled)
continue;
manage_dm_interrupts(adev, acrtc, true);
#ifdef CONFIG_DEBUG_FS
/* The stream has changed so CRC capture needs to re-enabled. */
source = dm_new_crtc_state->crc_src;
if (amdgpu_dm_is_valid_crc_source(source)) {
amdgpu_dm_crtc_configure_crc_source(
crtc, dm_new_crtc_state,
dm_new_crtc_state->crc_src);
}
#endif
}
}
/*
* amdgpu_dm_crtc_copy_transient_flags - copy mirrored flags from DRM to DC
* @crtc_state: the DRM CRTC state
@ -7316,12 +7306,10 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev,
* in atomic check.
*/
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
if (dm_old_crtc_state->interrupts_enabled &&
(!dm_new_crtc_state->interrupts_enabled ||
if (old_crtc_state->active &&
(!new_crtc_state->active ||
drm_atomic_crtc_needs_modeset(new_crtc_state)))
manage_dm_interrupts(adev, acrtc, false);
}
@ -7600,8 +7588,34 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dm_new_crtc_state);
}
/* Enable interrupts for CRTCs going through a modeset. */
amdgpu_dm_enable_crtc_interrupts(dev, state, true);
/**
* Enable interrupts for CRTCs that are newly enabled or went through
* a modeset. It was intentionally deferred until after the front end
* state was modified to wait until the OTG was on and so the IRQ
* handlers didn't access stale or invalid state.
*/
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
if (new_crtc_state->active &&
(!old_crtc_state->active ||
drm_atomic_crtc_needs_modeset(new_crtc_state))) {
manage_dm_interrupts(adev, acrtc, true);
#ifdef CONFIG_DEBUG_FS
/**
* Frontend may have changed so reapply the CRC capture
* settings for the stream.
*/
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) {
amdgpu_dm_crtc_configure_crc_source(
crtc, dm_new_crtc_state,
dm_new_crtc_state->crc_src);
}
#endif
}
}
for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
if (new_crtc_state->async_flip)
@ -7616,9 +7630,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dm, crtc, wait_for_vblank);
}
/* Enable interrupts for CRTCs going from 0 to n active planes. */
amdgpu_dm_enable_crtc_interrupts(dev, state, false);
/* Update audio instances for each connector. */
amdgpu_dm_commit_audio(dev, state);

View file

@ -43,6 +43,9 @@
*/
#define AMDGPU_DM_MAX_DISPLAY_INDEX 31
#define AMDGPU_DM_MAX_CRTC 6
/*
#include "include/amdgpu_dal_power_if.h"
#include "amdgpu_dm_irq.h"
@ -330,6 +333,13 @@ struct amdgpu_display_manager {
* available in FW
*/
const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
/**
* @mst_encoders:
*
* fake encoders used for DP MST.
*/
struct amdgpu_encoder mst_encoders[AMDGPU_DM_MAX_CRTC];
};
struct amdgpu_dm_connector {
@ -358,7 +368,6 @@ struct amdgpu_dm_connector {
struct amdgpu_dm_dp_aux dm_dp_aux;
struct drm_dp_mst_port *port;
struct amdgpu_dm_connector *mst_port;
struct amdgpu_encoder *mst_encoder;
struct drm_dp_aux *dsc_aux;
/* TODO see if we can merge with ddc_bus or make a dm_connector */
@ -405,7 +414,6 @@ struct dm_crtc_state {
int update_type;
int active_planes;
bool interrupts_enabled;
int crc_skip_count;
enum amdgpu_dm_pipe_crc_source crc_src;

View file

@ -46,6 +46,89 @@ struct dmub_debugfs_trace_entry {
uint32_t param1;
};
/* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array
*
* Function takes in attributes passed to debugfs write entry
* and writes into param array.
* The user passes max_param_num to identify maximum number of
* parameters that could be parsed.
*
*/
static int parse_write_buffer_into_params(char *wr_buf, uint32_t wr_buf_size,
long *param, const char __user *buf,
int max_param_num,
uint8_t *param_nums)
{
char *wr_buf_ptr = NULL;
uint32_t wr_buf_count = 0;
int r;
char *sub_str = NULL;
const char delimiter[3] = {' ', '\n', '\0'};
uint8_t param_index = 0;
*param_nums = 0;
wr_buf_ptr = wr_buf;
r = copy_from_user(wr_buf_ptr, buf, wr_buf_size);
/* r is bytes not be copied */
if (r >= wr_buf_size) {
DRM_DEBUG_DRIVER("user data not be read\n");
return -EINVAL;
}
/* check number of parameters. isspace could not differ space and \n */
while ((*wr_buf_ptr != 0xa) && (wr_buf_count < wr_buf_size)) {
/* skip space*/
while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
if (wr_buf_count == wr_buf_size)
break;
/* skip non-space*/
while ((!isspace(*wr_buf_ptr)) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
(*param_nums)++;
if (wr_buf_count == wr_buf_size)
break;
}
if (*param_nums > max_param_num)
*param_nums = max_param_num;
;
wr_buf_ptr = wr_buf; /* reset buf pointer */
wr_buf_count = 0; /* number of char already checked */
while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
while (param_index < *param_nums) {
/* after strsep, wr_buf_ptr will be moved to after space */
sub_str = strsep(&wr_buf_ptr, delimiter);
r = kstrtol(sub_str, 16, &(param[param_index]));
if (r)
DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r);
param_index++;
}
return 0;
}
/* function description
* get/ set DP configuration: lane_count, link_rate, spread_spectrum
*
@ -161,15 +244,11 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
struct dc *dc = (struct dc *)link->dc;
struct dc_link_settings prefer_link_settings;
char *wr_buf = NULL;
char *wr_buf_ptr = NULL;
const uint32_t wr_buf_size = 40;
int r;
int bytes_from_user;
char *sub_str;
/* 0: lane_count; 1: link_rate */
uint8_t param_index = 0;
int max_param_num = 2;
uint8_t param_nums = 0;
long param[2];
const char delimiter[3] = {' ', '\n', '\0'};
bool valid_input = false;
if (size == 0)
@ -177,35 +256,20 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL);
if (!wr_buf)
return -EINVAL;
wr_buf_ptr = wr_buf;
return -ENOSPC;
r = copy_from_user(wr_buf_ptr, buf, wr_buf_size);
/* r is bytes not be copied */
if (r >= wr_buf_size) {
if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
(long *)param, buf,
max_param_num,
&param_nums)) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("user data not read\n");
return -EINVAL;
}
bytes_from_user = wr_buf_size - r;
while (isspace(*wr_buf_ptr))
wr_buf_ptr++;
while ((*wr_buf_ptr != '\0') && (param_index < 2)) {
sub_str = strsep(&wr_buf_ptr, delimiter);
r = kstrtol(sub_str, 16, &param[param_index]);
if (r)
DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r);
param_index++;
while (isspace(*wr_buf_ptr))
wr_buf_ptr++;
if (param_nums <= 0) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("user data not be read\n");
return -EINVAL;
}
switch (param[0]) {
@ -233,7 +297,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
if (!valid_input) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("Invalid Input value No HW will be programmed\n");
return bytes_from_user;
return size;
}
/* save user force lane_count, link_rate to preferred settings
@ -246,7 +310,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
dc_link_set_preferred_link_settings(dc, &prefer_link_settings, link);
kfree(wr_buf);
return bytes_from_user;
return size;
}
/* function: get current DP PHY settings: voltage swing, pre-emphasis,
@ -337,51 +401,34 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
struct dc_link *link = connector->dc_link;
struct dc *dc = (struct dc *)link->dc;
char *wr_buf = NULL;
char *wr_buf_ptr = NULL;
uint32_t wr_buf_size = 40;
int r;
int bytes_from_user;
char *sub_str;
uint8_t param_index = 0;
long param[3];
const char delimiter[3] = {' ', '\n', '\0'};
bool use_prefer_link_setting;
struct link_training_settings link_lane_settings;
int max_param_num = 3;
uint8_t param_nums = 0;
int r = 0;
if (size == 0)
return 0;
return -EINVAL;
wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL);
if (!wr_buf)
return 0;
wr_buf_ptr = wr_buf;
return -ENOSPC;
r = copy_from_user(wr_buf_ptr, buf, wr_buf_size);
/* r is bytes not be copied */
if (r >= wr_buf_size) {
if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
(long *)param, buf,
max_param_num,
&param_nums)) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("user data not be read\n");
return 0;
return -EINVAL;
}
bytes_from_user = wr_buf_size - r;
while (isspace(*wr_buf_ptr))
wr_buf_ptr++;
while ((*wr_buf_ptr != '\0') && (param_index < 3)) {
sub_str = strsep(&wr_buf_ptr, delimiter);
r = kstrtol(sub_str, 16, &param[param_index]);
if (r)
DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r);
param_index++;
while (isspace(*wr_buf_ptr))
wr_buf_ptr++;
if (param_nums <= 0) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("user data not be read\n");
return -EINVAL;
}
if ((param[0] > VOLTAGE_SWING_MAX_LEVEL) ||
@ -389,7 +436,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
(param[2] > POST_CURSOR2_MAX_LEVEL)) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("Invalid Input No HW will be programmed\n");
return bytes_from_user;
return size;
}
/* get link settings: lane count, link rate */
@ -429,7 +476,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
dc_link_set_drive_settings(dc, &link_lane_settings, link);
kfree(wr_buf);
return bytes_from_user;
return size;
}
/* function description
@ -496,19 +543,13 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dc_link *link = connector->dc_link;
char *wr_buf = NULL;
char *wr_buf_ptr = NULL;
uint32_t wr_buf_size = 100;
uint32_t wr_buf_count = 0;
int r;
int bytes_from_user;
char *sub_str = NULL;
uint8_t param_index = 0;
uint8_t param_nums = 0;
long param[11] = {0x0};
const char delimiter[3] = {' ', '\n', '\0'};
int max_param_num = 11;
enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
bool disable_hpd = false;
bool valid_test_pattern = false;
uint8_t param_nums = 0;
/* init with defalut 80bit custom pattern */
uint8_t custom_pattern[10] = {
0x1f, 0x7c, 0xf0, 0xc1, 0x07,
@ -522,70 +563,26 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
int i;
if (size == 0)
return 0;
return -EINVAL;
wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL);
if (!wr_buf)
return 0;
wr_buf_ptr = wr_buf;
return -ENOSPC;
r = copy_from_user(wr_buf_ptr, buf, wr_buf_size);
if (parse_write_buffer_into_params(wr_buf, wr_buf_size,
(long *)param, buf,
max_param_num,
&param_nums)) {
kfree(wr_buf);
return -EINVAL;
}
/* r is bytes not be copied */
if (r >= wr_buf_size) {
if (param_nums <= 0) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("user data not be read\n");
return 0;
return -EINVAL;
}
bytes_from_user = wr_buf_size - r;
/* check number of parameters. isspace could not differ space and \n */
while ((*wr_buf_ptr != 0xa) && (wr_buf_count < wr_buf_size)) {
/* skip space*/
while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
if (wr_buf_count == wr_buf_size)
break;
/* skip non-space*/
while ((!isspace(*wr_buf_ptr)) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
param_nums++;
if (wr_buf_count == wr_buf_size)
break;
}
/* max 11 parameters */
if (param_nums > 11)
param_nums = 11;
wr_buf_ptr = wr_buf; /* reset buf pinter */
wr_buf_count = 0; /* number of char already checked */
while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) {
wr_buf_ptr++;
wr_buf_count++;
}
while (param_index < param_nums) {
/* after strsep, wr_buf_ptr will be moved to after space */
sub_str = strsep(&wr_buf_ptr, delimiter);
r = kstrtol(sub_str, 16, &param[param_index]);
if (r)
DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r);
param_index++;
}
test_pattern = param[0];
@ -618,7 +615,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
if (!valid_test_pattern) {
kfree(wr_buf);
DRM_DEBUG_DRIVER("Invalid Test Pattern Parameters\n");
return bytes_from_user;
return size;
}
if (test_pattern == DP_TEST_PATTERN_80BIT_CUSTOM) {
@ -685,7 +682,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
kfree(wr_buf);
return bytes_from_user;
return size;
}
/**

View file

@ -95,7 +95,6 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector)
{
struct amdgpu_dm_connector *aconnector =
to_amdgpu_dm_connector(connector);
struct amdgpu_encoder *amdgpu_encoder = aconnector->mst_encoder;
if (aconnector->dc_sink) {
dc_link_remove_remote_sink(aconnector->dc_link,
@ -105,8 +104,6 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector)
kfree(aconnector->edid);
drm_encoder_cleanup(&amdgpu_encoder->base);
kfree(amdgpu_encoder);
drm_connector_cleanup(connector);
drm_dp_mst_put_port_malloc(aconnector->port);
kfree(aconnector);
@ -243,7 +240,11 @@ static struct drm_encoder *
dm_mst_atomic_best_encoder(struct drm_connector *connector,
struct drm_connector_state *connector_state)
{
return &to_amdgpu_dm_connector(connector)->mst_encoder->base;
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
return &adev->dm.mst_encoders[acrtc->crtc_id].base;
}
static int
@ -306,31 +307,27 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = {
.destroy = amdgpu_dm_encoder_destroy,
};
static struct amdgpu_encoder *
dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector)
void
dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev)
{
struct drm_device *dev = connector->base.dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder;
struct drm_encoder *encoder;
struct drm_device *dev = adev->ddev;
int i;
amdgpu_encoder = kzalloc(sizeof(*amdgpu_encoder), GFP_KERNEL);
if (!amdgpu_encoder)
return NULL;
for (i = 0; i < adev->dm.display_indexes_num; i++) {
struct amdgpu_encoder *amdgpu_encoder = &adev->dm.mst_encoders[i];
struct drm_encoder *encoder = &amdgpu_encoder->base;
encoder = &amdgpu_encoder->base;
encoder->possible_crtcs = amdgpu_dm_get_encoder_crtc_mask(adev);
encoder->possible_crtcs = amdgpu_dm_get_encoder_crtc_mask(adev);
drm_encoder_init(
dev,
&amdgpu_encoder->base,
&amdgpu_dm_encoder_funcs,
DRM_MODE_ENCODER_DPMST,
NULL);
drm_encoder_init(
dev,
&amdgpu_encoder->base,
&amdgpu_dm_encoder_funcs,
DRM_MODE_ENCODER_DPMST,
NULL);
drm_encoder_helper_add(encoder, &amdgpu_dm_encoder_helper_funcs);
return amdgpu_encoder;
drm_encoder_helper_add(encoder, &amdgpu_dm_encoder_helper_funcs);
}
}
static struct drm_connector *
@ -343,6 +340,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
int i;
aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
if (!aconnector)
@ -369,9 +367,10 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
master->dc_link,
master->connector_id);
aconnector->mst_encoder = dm_dp_create_fake_mst_encoder(master);
drm_connector_attach_encoder(&aconnector->base,
&aconnector->mst_encoder->base);
for (i = 0; i < adev->dm.display_indexes_num; i++) {
drm_connector_attach_encoder(&aconnector->base,
&adev->dm.mst_encoders[i].base);
}
connector->max_bpc_property = master->base.max_bpc_property;
if (connector->max_bpc_property)

View file

@ -35,6 +35,9 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *aconnector,
int link_index);
void
dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev);
#if defined(CONFIG_DRM_AMD_DC_DCN)
bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
struct dc_state *dc_state);

View file

@ -174,9 +174,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
case FAMILY_NV:
#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) {
/* TODO: to add SIENNA_CICHLID clk_mgr support, once CLK IP header files are available,
* for now use DCN3AG clk mgr.
*/
dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
break;
}

View file

@ -26,6 +26,7 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
#include <linux/delay.h>
#define MAX_INSTANCE 5
#define MAX_SEGMENT 5
@ -68,10 +69,42 @@ static const struct IP_BASE MP1_BASE = { { { { 0x00016000, 0, 0, 0, 0 } },
#define VBIOSSMC_MSG_SetDispclkFreq 0x4
#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
#define VBIOSSMC_Status_BUSY 0x0
#define VBIOSSMC_Result_OK 0x1
#define VBIOSSMC_Result_Failed 0xFF
#define VBIOSSMC_Result_UnknownCmd 0xFE
#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
/*
* Function to be used instead of REG_WAIT macro because the wait ends when
* the register is NOT EQUAL to zero, and because the translation in msg_if.h
* won't work with REG_WAIT.
*/
static uint32_t rv1_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
uint32_t res_val = VBIOSSMC_Status_BUSY;
do {
res_val = REG_READ(MP1_SMN_C2PMSG_91);
if (res_val != VBIOSSMC_Status_BUSY)
break;
if (delay_us >= 1000)
msleep(delay_us/1000);
else if (delay_us > 0)
udelay(delay_us);
} while (max_retries--);
return res_val;
}
int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param)
{
uint32_t result;
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, 0);
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
/* Set the parameter register for the SMU message, unit is Mhz */
REG_WRITE(MP1_SMN_C2PMSG_83, param);
@ -79,7 +112,9 @@ int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned
/* Trigger the message transaction by writing the message ID */
REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
result = rv1_smu_wait_for_response(clk_mgr, 10, 1000);
ASSERT(result == VBIOSSMC_Result_OK);
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);

View file

@ -184,13 +184,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
pp_smu->set_display_count(&pp_smu->pp_smu, display_count);
}
if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) {
clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
if (pp_smu && pp_smu->set_voltage_by_freq)
pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000);
}
if (dc->debug.force_min_dcfclk_mhz > 0)
new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
@ -417,8 +410,6 @@ static bool dcn2_are_clock_states_equal(struct dc_clocks *a,
return false;
else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
return false;
else if (a->phyclk_khz != b->phyclk_khz)
return false;
else if (a->dramclk_khz != b->dramclk_khz)
return false;
else if (a->p_state_change_support != b->p_state_change_support)
@ -427,6 +418,31 @@ static bool dcn2_are_clock_states_equal(struct dc_clocks *a,
return true;
}
/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */
static void dcn2_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
unsigned int i, max_phyclk_req = 0;
struct pp_smu_funcs_nv *pp_smu = NULL;
if (!clk_mgr->pp_smu || !clk_mgr->pp_smu->nv_funcs.set_voltage_by_freq)
return;
pp_smu = &clk_mgr->pp_smu->nv_funcs;
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
for (i = 0; i < MAX_PIPES * 2; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) {
clk_mgr_base->clks.phyclk_khz = max_phyclk_req;
pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000);
}
}
static struct clk_mgr_funcs dcn2_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = dcn2_update_clocks,
@ -434,6 +450,7 @@ static struct clk_mgr_funcs dcn2_funcs = {
.enable_pme_wa = dcn2_enable_pme_wa,
.get_clock = dcn2_get_clock,
.are_clock_states_equal = dcn2_are_clock_states_equal,
.notify_link_rate_change = dcn2_notify_link_rate_change,
};

View file

@ -136,11 +136,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
}
}
if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) {
clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
rn_vbios_smu_set_phyclk(clk_mgr, clk_mgr_base->clks.phyclk_khz);
}
if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
rn_vbios_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
@ -496,13 +491,33 @@ static bool rn_are_clock_states_equal(struct dc_clocks *a,
}
/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */
static void rn_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
unsigned int i, max_phyclk_req = 0;
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
for (i = 0; i < MAX_PIPES * 2; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) {
clk_mgr_base->clks.phyclk_khz = max_phyclk_req;
rn_vbios_smu_set_phyclk(clk_mgr, clk_mgr_base->clks.phyclk_khz);
}
}
static struct clk_mgr_funcs dcn21_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = rn_update_clocks,
.init_clocks = rn_init_clocks,
.enable_pme_wa = rn_enable_pme_wa,
.are_clock_states_equal = rn_are_clock_states_equal,
.notify_wm_ranges = rn_notify_wm_ranges
.notify_wm_ranges = rn_notify_wm_ranges,
.notify_link_rate_change = rn_notify_link_rate_change,
};
static struct clk_bw_params rn_bw_params = {
@ -619,6 +634,42 @@ static struct wm_table lpddr4_wm_table = {
}
};
static struct wm_table lpddr4_wm_table_with_disabled_ppt = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 8.32,
.sr_enter_plus_exit_time_us = 9.38,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.82,
.sr_enter_plus_exit_time_us = 11.196,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.89,
.sr_enter_plus_exit_time_us = 11.24,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.748,
.sr_enter_plus_exit_time_us = 11.102,
.valid = true,
},
}
};
static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
{
@ -723,6 +774,7 @@ void rn_clk_mgr_construct(
struct clk_log_info log_info = {0};
clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr);
clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr);
/* SMU Version 55.51.0 and up no longer have an issue
* that needs to limit minimum dispclk */
@ -737,7 +789,11 @@ void rn_clk_mgr_construct(
clk_mgr->base.dentist_vco_freq_khz = 3600000;
if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) {
rn_bw_params.wm_table = lpddr4_wm_table;
if (clk_mgr->periodic_retraining_disabled) {
rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt;
} else {
rn_bw_params.wm_table = lpddr4_wm_table;
}
} else {
rn_bw_params.wm_table = ddr4_wm_table;
}

View file

@ -26,6 +26,7 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
#include <linux/delay.h>
#include "renoir_ip_offset.h"
@ -51,12 +52,46 @@
#define VBIOSSMC_MSG_GetFclkFrequency 0xB
#define VBIOSSMC_MSG_SetDisplayCount 0xC
#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xD
#define VBIOSSMC_MSG_UpdatePmeRestore 0xE
#define VBIOSSMC_MSG_UpdatePmeRestore 0xE
#define VBIOSSMC_MSG_IsPeriodicRetrainingDisabled 0xF
#define VBIOSSMC_Status_BUSY 0x0
#define VBIOSSMC_Result_OK 0x1
#define VBIOSSMC_Result_Failed 0xFF
#define VBIOSSMC_Result_UnknownCmd 0xFE
#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
/*
* Function to be used instead of REG_WAIT macro because the wait ends when
* the register is NOT EQUAL to zero, and because the translation in msg_if.h
* won't work with REG_WAIT.
*/
static uint32_t rn_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
uint32_t res_val = VBIOSSMC_Status_BUSY;
do {
res_val = REG_READ(MP1_SMN_C2PMSG_91);
if (res_val != VBIOSSMC_Status_BUSY)
break;
if (delay_us >= 1000)
msleep(delay_us/1000);
else if (delay_us > 0)
udelay(delay_us);
} while (max_retries--);
return res_val;
}
int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param)
{
uint32_t result;
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, 0);
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
/* Set the parameter register for the SMU message, unit is Mhz */
REG_WRITE(MP1_SMN_C2PMSG_83, param);
@ -64,7 +99,9 @@ int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned
/* Trigger the message transaction by writing the message ID */
REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
result = rn_smu_wait_for_response(clk_mgr, 10, 1000);
ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd);
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);
@ -196,3 +233,11 @@ void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
VBIOSSMC_MSG_UpdatePmeRestore,
0);
}
int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr)
{
return rn_vbios_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_IsPeriodicRetrainingDisabled,
0);
}

View file

@ -36,5 +36,6 @@ int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_
void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count);
void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr);
#endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */

View file

@ -260,11 +260,6 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base,
if (enter_display_off == safe_to_lower)
dcn30_smu_set_num_of_displays(clk_mgr, display_count);
if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) {
clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000);
}
if (dc->debug.force_min_dcfclk_mhz > 0)
new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
@ -344,16 +339,12 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base,
static void dcn3_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
{
unsigned int i;
long long table_addr;
WatermarksExternal_t *table;
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table;
if (!clk_mgr->smu_present)
return;
/* need physical address of table to give to PMFW */
table = (WatermarksExternal_t *) dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t), &table_addr);
if (!table)
// should log failure
return;
@ -371,11 +362,9 @@ static void dcn3_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
table->Watermarks.WatermarkRow[WM_DCEFCLK][i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type;
}
dcn30_smu_set_dram_addr_high(clk_mgr, table_addr >> 32);
dcn30_smu_set_dram_addr_low(clk_mgr, table_addr & 0xFFFFFFFF);
dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
dcn30_smu_transfer_wm_table_dram_2_smu(clk_mgr);
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, table);
}
/* Set min memclk to minimum, either constrained by the current mode or DPM0 */
@ -437,8 +426,6 @@ static bool dcn3_are_clock_states_equal(struct dc_clocks *a,
return false;
else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
return false;
else if (a->phyclk_khz != b->phyclk_khz)
return false;
else if (a->dramclk_khz != b->dramclk_khz)
return false;
else if (a->p_state_change_support != b->p_state_change_support)
@ -457,6 +444,28 @@ static void dcn3_enable_pme_wa(struct clk_mgr *clk_mgr_base)
dcn30_smu_set_pme_workaround(clk_mgr);
}
/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */
static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
unsigned int i, max_phyclk_req = clk_mgr_base->bw_params->clk_table.entries[0].phyclk_mhz * 1000;
if (!clk_mgr->smu_present)
return;
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
for (i = 0; i < MAX_PIPES * 2; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) {
clk_mgr_base->clks.phyclk_khz = max_phyclk_req;
dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000);
}
}
static struct clk_mgr_funcs dcn3_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = dcn3_update_clocks,
@ -466,7 +475,8 @@ static struct clk_mgr_funcs dcn3_funcs = {
.set_hard_max_memclk = dcn3_set_hard_max_memclk,
.get_memclk_states_from_smu = dcn3_get_memclk_states_from_smu,
.are_clock_states_equal = dcn3_are_clock_states_equal,
.enable_pme_wa = dcn3_enable_pme_wa
.enable_pme_wa = dcn3_enable_pme_wa,
.notify_link_rate_change = dcn30_notify_link_rate_change,
};
static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr)
@ -534,10 +544,19 @@ void dcn3_clk_mgr_construct(
dce_clock_read_ss_info(clk_mgr);
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
/* need physical address of table to give to PMFW */
clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
&clk_mgr->wm_range_table_addr);
}
void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
{
if (clk_mgr->base.bw_params)
kfree(clk_mgr->base.bw_params);
if (clk_mgr->wm_range_table)
dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,
clk_mgr->wm_range_table);
}

Some files were not shown because too many files have changed in this diff Show more