Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3: - Add new thermal sensors for vega asics - Various RAS fixes - Add sysfs interface for memory interface utilization - Use HMM rather than mmu notifier for user pages - Expose xgmi topology via kfd - SR-IOV fixes - Fixes for manual driver reload - Add unique identifier for vega asics - Clean up user fence handling with UVD/VCE/VCN blocks - Convert DC to use core bpc attribute rather than a custom one - Add GWS support for KFD - Vega powerplay improvements - Add CRC support for DCE 12 - SR-IOV support for new security policy - Various cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.comalistair/sunxi64-5.4-dsi
commit
91c1ead6ae
|
@ -70,6 +70,26 @@ Interrupt Handling
|
||||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
|
||||||
:internal:
|
:internal:
|
||||||
|
|
||||||
|
AMDGPU XGMI Support
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
||||||
|
:doc: AMDGPU XGMI Support
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
||||||
|
:internal:
|
||||||
|
|
||||||
|
AMDGPU RAS debugfs control interface
|
||||||
|
====================================
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
|
:doc: AMDGPU RAS debugfs control interface
|
||||||
|
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
|
:internal:
|
||||||
|
|
||||||
|
|
||||||
GPU Power/Thermal Controls and Monitoring
|
GPU Power/Thermal Controls and Monitoring
|
||||||
=========================================
|
=========================================
|
||||||
|
|
||||||
|
|
|
@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
|
||||||
config DRM_AMDGPU_USERPTR
|
config DRM_AMDGPU_USERPTR
|
||||||
bool "Always enable userptr write support"
|
bool "Always enable userptr write support"
|
||||||
depends on DRM_AMDGPU
|
depends on DRM_AMDGPU
|
||||||
select MMU_NOTIFIER
|
depends on ARCH_HAS_HMM
|
||||||
|
select HMM_MIRROR
|
||||||
help
|
help
|
||||||
This option selects CONFIG_MMU_NOTIFIER if it isn't already
|
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
|
||||||
selected to enabled full userptr support.
|
isn't already selected to enabled full userptr support.
|
||||||
|
|
||||||
config DRM_AMDGPU_GART_DEBUGFS
|
config DRM_AMDGPU_GART_DEBUGFS
|
||||||
bool "Allow GART access through debugfs"
|
bool "Allow GART access through debugfs"
|
||||||
|
|
|
@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
||||||
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
|
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
|
||||||
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
|
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
|
||||||
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
|
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
|
||||||
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
|
amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
|
||||||
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
|
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
|
||||||
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
|
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
|
||||||
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
|
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
|
||||||
|
@ -173,7 +173,7 @@ endif
|
||||||
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
|
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
|
||||||
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
|
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
|
||||||
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
|
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
|
||||||
amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
|
amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
|
||||||
|
|
||||||
include $(FULL_AMD_PATH)/powerplay/Makefile
|
include $(FULL_AMD_PATH)/powerplay/Makefile
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
|
||||||
extern int amdgpu_hw_i2c;
|
extern int amdgpu_hw_i2c;
|
||||||
extern int amdgpu_pcie_gen2;
|
extern int amdgpu_pcie_gen2;
|
||||||
extern int amdgpu_msi;
|
extern int amdgpu_msi;
|
||||||
extern int amdgpu_lockup_timeout;
|
|
||||||
extern int amdgpu_dpm;
|
extern int amdgpu_dpm;
|
||||||
extern int amdgpu_fw_load_type;
|
extern int amdgpu_fw_load_type;
|
||||||
extern int amdgpu_aspm;
|
extern int amdgpu_aspm;
|
||||||
|
@ -211,6 +210,7 @@ struct amdgpu_irq_src;
|
||||||
struct amdgpu_fpriv;
|
struct amdgpu_fpriv;
|
||||||
struct amdgpu_bo_va_mapping;
|
struct amdgpu_bo_va_mapping;
|
||||||
struct amdgpu_atif;
|
struct amdgpu_atif;
|
||||||
|
struct kfd_vm_fault_info;
|
||||||
|
|
||||||
enum amdgpu_cp_irq {
|
enum amdgpu_cp_irq {
|
||||||
AMDGPU_CP_IRQ_GFX_EOP = 0,
|
AMDGPU_CP_IRQ_GFX_EOP = 0,
|
||||||
|
@ -415,6 +415,7 @@ struct amdgpu_fpriv {
|
||||||
};
|
};
|
||||||
|
|
||||||
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
|
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
|
||||||
|
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
|
||||||
|
|
||||||
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||||
unsigned size, struct amdgpu_ib *ib);
|
unsigned size, struct amdgpu_ib *ib);
|
||||||
|
@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
|
||||||
uint64_t *count1);
|
uint64_t *count1);
|
||||||
/* do we need to reset the asic at init time (e.g., kexec) */
|
/* do we need to reset the asic at init time (e.g., kexec) */
|
||||||
bool (*need_reset_on_init)(struct amdgpu_device *adev);
|
bool (*need_reset_on_init)(struct amdgpu_device *adev);
|
||||||
|
/* PCIe replay counter */
|
||||||
|
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
|
||||||
u32 ref_and_mask_sdma1;
|
u32 ref_and_mask_sdma1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct amdgpu_mmio_remap {
|
||||||
|
u32 reg_offset;
|
||||||
|
resource_size_t bus_addr;
|
||||||
|
};
|
||||||
|
|
||||||
struct amdgpu_nbio_funcs {
|
struct amdgpu_nbio_funcs {
|
||||||
const struct nbio_hdp_flush_reg *hdp_flush_reg;
|
const struct nbio_hdp_flush_reg *hdp_flush_reg;
|
||||||
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
|
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
|
||||||
|
@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
|
||||||
void (*ih_control)(struct amdgpu_device *adev);
|
void (*ih_control)(struct amdgpu_device *adev);
|
||||||
void (*init_registers)(struct amdgpu_device *adev);
|
void (*init_registers)(struct amdgpu_device *adev);
|
||||||
void (*detect_hw_virt)(struct amdgpu_device *adev);
|
void (*detect_hw_virt)(struct amdgpu_device *adev);
|
||||||
|
void (*remap_hdp_registers)(struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_df_funcs {
|
struct amdgpu_df_funcs {
|
||||||
|
@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
|
||||||
u32 *flags);
|
u32 *flags);
|
||||||
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
|
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
|
||||||
bool enable);
|
bool enable);
|
||||||
|
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
|
||||||
|
int is_enable);
|
||||||
|
int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
|
||||||
|
int is_disable);
|
||||||
|
void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
|
||||||
|
uint64_t *count);
|
||||||
};
|
};
|
||||||
/* Define the HW IP blocks will be used in driver , add more if necessary */
|
/* Define the HW IP blocks will be used in driver , add more if necessary */
|
||||||
enum amd_hw_ip_block_type {
|
enum amd_hw_ip_block_type {
|
||||||
|
@ -764,6 +779,7 @@ struct amdgpu_device {
|
||||||
void __iomem *rmmio;
|
void __iomem *rmmio;
|
||||||
/* protects concurrent MM_INDEX/DATA based register access */
|
/* protects concurrent MM_INDEX/DATA based register access */
|
||||||
spinlock_t mmio_idx_lock;
|
spinlock_t mmio_idx_lock;
|
||||||
|
struct amdgpu_mmio_remap rmmio_remap;
|
||||||
/* protects concurrent SMC based register access */
|
/* protects concurrent SMC based register access */
|
||||||
spinlock_t smc_idx_lock;
|
spinlock_t smc_idx_lock;
|
||||||
amdgpu_rreg_t smc_rreg;
|
amdgpu_rreg_t smc_rreg;
|
||||||
|
@ -936,6 +952,13 @@ struct amdgpu_device {
|
||||||
struct work_struct xgmi_reset_work;
|
struct work_struct xgmi_reset_work;
|
||||||
|
|
||||||
bool in_baco_reset;
|
bool in_baco_reset;
|
||||||
|
|
||||||
|
long gfx_timeout;
|
||||||
|
long sdma_timeout;
|
||||||
|
long video_timeout;
|
||||||
|
long compute_timeout;
|
||||||
|
|
||||||
|
uint64_t unique_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
||||||
|
@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
||||||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||||
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
||||||
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
||||||
|
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
|
||||||
|
|
||||||
/* Common functions */
|
/* Common functions */
|
||||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||||
|
@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
|
||||||
const u32 array_size);
|
const u32 array_size);
|
||||||
|
|
||||||
bool amdgpu_device_is_px(struct drm_device *dev);
|
bool amdgpu_device_is_px(struct drm_device *dev);
|
||||||
|
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
|
||||||
|
struct amdgpu_device *peer_adev);
|
||||||
|
|
||||||
/* atpx handler */
|
/* atpx handler */
|
||||||
#if defined(CONFIG_VGA_SWITCHEROO)
|
#if defined(CONFIG_VGA_SWITCHEROO)
|
||||||
void amdgpu_register_atpx_handler(void);
|
void amdgpu_register_atpx_handler(void);
|
||||||
|
|
|
@ -25,8 +25,10 @@
|
||||||
#include <drm/drmP.h>
|
#include <drm/drmP.h>
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#include "amdgpu_gfx.h"
|
#include "amdgpu_gfx.h"
|
||||||
|
#include "amdgpu_dma_buf.h"
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/dma-buf.h>
|
#include <linux/dma-buf.h>
|
||||||
|
#include "amdgpu_xgmi.h"
|
||||||
|
|
||||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||||
|
|
||||||
|
@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||||
};
|
};
|
||||||
|
|
||||||
/* this is going to have a few of the MSBs set that we need to
|
/* this is going to have a few of the MSBs set that we need to
|
||||||
* clear */
|
* clear
|
||||||
|
*/
|
||||||
bitmap_complement(gpu_resources.queue_bitmap,
|
bitmap_complement(gpu_resources.queue_bitmap,
|
||||||
adev->gfx.mec.queue_bitmap,
|
adev->gfx.mec.queue_bitmap,
|
||||||
KGD_MAX_QUEUES);
|
KGD_MAX_QUEUES);
|
||||||
|
@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||||
gpu_resources.queue_bitmap);
|
gpu_resources.queue_bitmap);
|
||||||
|
|
||||||
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
|
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
|
||||||
* nbits is not compile time constant */
|
* nbits is not compile time constant
|
||||||
|
*/
|
||||||
last_valid_bit = 1 /* only first MEC can have compute queues */
|
last_valid_bit = 1 /* only first MEC can have compute queues */
|
||||||
* adev->gfx.mec.num_pipe_per_mec
|
* adev->gfx.mec.num_pipe_per_mec
|
||||||
* adev->gfx.mec.num_queue_per_pipe;
|
* adev->gfx.mec.num_queue_per_pipe;
|
||||||
|
@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||||
amdgpu_bo_unref(&(bo));
|
amdgpu_bo_unref(&(bo));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
|
||||||
|
void **mem_obj)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
struct amdgpu_bo *bo = NULL;
|
||||||
|
struct amdgpu_bo_param bp;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
memset(&bp, 0, sizeof(bp));
|
||||||
|
bp.size = size;
|
||||||
|
bp.byte_align = 1;
|
||||||
|
bp.domain = AMDGPU_GEM_DOMAIN_GWS;
|
||||||
|
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||||
|
bp.type = ttm_bo_type_device;
|
||||||
|
bp.resv = NULL;
|
||||||
|
|
||||||
|
r = amdgpu_bo_create(adev, &bp, &bo);
|
||||||
|
if (r) {
|
||||||
|
dev_err(adev->dev,
|
||||||
|
"failed to allocate gws BO for amdkfd (%d)\n", r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
*mem_obj = bo;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
|
||||||
|
{
|
||||||
|
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
|
||||||
|
|
||||||
|
amdgpu_bo_unref(&bo);
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
||||||
enum kgd_engine_type type)
|
enum kgd_engine_type type)
|
||||||
{
|
{
|
||||||
|
@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
|
||||||
|
|
||||||
return adev->gmc.xgmi.hive_id;
|
return adev->gmc.xgmi.hive_id;
|
||||||
}
|
}
|
||||||
|
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)dst;
|
||||||
|
int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
|
||||||
|
adev->gmc.xgmi.physical_node_id,
|
||||||
|
peer_adev->gmc.xgmi.physical_node_id, ret);
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
return (uint8_t)ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
|
||||||
|
return adev->rmmio_remap.bus_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
|
||||||
|
return adev->gds.gws_size;
|
||||||
|
}
|
||||||
|
|
||||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||||
uint32_t vmid, uint64_t gpu_addr,
|
uint32_t vmid, uint64_t gpu_addr,
|
||||||
|
|
|
@ -61,7 +61,6 @@ struct kgd_mem {
|
||||||
|
|
||||||
atomic_t invalid;
|
atomic_t invalid;
|
||||||
struct amdkfd_process_info *process_info;
|
struct amdkfd_process_info *process_info;
|
||||||
struct page **user_pages;
|
|
||||||
|
|
||||||
struct amdgpu_sync sync;
|
struct amdgpu_sync sync;
|
||||||
|
|
||||||
|
@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||||
void **mem_obj, uint64_t *gpu_addr,
|
void **mem_obj, uint64_t *gpu_addr,
|
||||||
void **cpu_ptr, bool mqd_gfx9);
|
void **cpu_ptr, bool mqd_gfx9);
|
||||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||||
|
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
|
||||||
|
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
|
||||||
|
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
|
||||||
|
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
|
||||||
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
||||||
enum kgd_engine_type type);
|
enum kgd_engine_type type);
|
||||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||||
|
@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||||
uint32_t *flags);
|
uint32_t *flags);
|
||||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||||
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
||||||
|
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
|
||||||
|
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
|
||||||
|
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
|
||||||
|
|
||||||
#define read_user_wptr(mmptr, wptr, dst) \
|
#define read_user_wptr(mmptr, wptr, dst) \
|
||||||
({ \
|
({ \
|
||||||
|
|
|
@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
|
||||||
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
||||||
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
|
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
|
||||||
|
|
||||||
pr_debug("kfd: sdma base address: 0x%x\n", retval);
|
pr_debug("sdma base address: 0x%x\n", retval);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
|
@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
|
||||||
|
|
||||||
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
||||||
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
|
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
|
||||||
pr_debug("kfd: sdma base address: 0x%x\n", retval);
|
pr_debug("sdma base address: 0x%x\n", retval);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
|
@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
||||||
|
|
||||||
lock_srbm(kgd, 0, 0, 0, vmid);
|
lock_srbm(kgd, 0, 0, 0, vmid);
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
|
||||||
/* APE1 no longer exists on GFX9 */
|
/* APE1 no longer exists on GFX9 */
|
||||||
|
|
||||||
unlock_srbm(kgd);
|
unlock_srbm(kgd);
|
||||||
|
@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
|
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
|
||||||
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
|
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
|
||||||
((mec << 5) | (pipe << 3) | queue_id | 0x80));
|
((mec << 5) | (pipe << 3) | queue_id | 0x80));
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
|
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
|
||||||
|
@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
|
|
||||||
for (reg = hqd_base;
|
for (reg = hqd_base;
|
||||||
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
|
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||||
WREG32(reg, mqd_hqd[reg - hqd_base]);
|
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
|
||||||
|
|
||||||
|
|
||||||
/* Activate doorbell logic before triggering WPTR poll. */
|
/* Activate doorbell logic before triggering WPTR poll. */
|
||||||
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
|
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
|
||||||
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
|
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
|
||||||
|
|
||||||
if (wptr) {
|
if (wptr) {
|
||||||
/* Don't read wptr with get_user because the user
|
/* Don't read wptr with get_user because the user
|
||||||
|
@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
|
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
|
||||||
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
|
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
||||||
lower_32_bits(guessed_wptr));
|
lower_32_bits(guessed_wptr));
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
||||||
upper_32_bits(guessed_wptr));
|
upper_32_bits(guessed_wptr));
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||||
lower_32_bits((uintptr_t)wptr));
|
lower_32_bits((uintptr_t)wptr));
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||||
upper_32_bits((uintptr_t)wptr));
|
upper_32_bits((uintptr_t)wptr));
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
|
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
|
||||||
get_queue_mask(adev, pipe_id, queue_id));
|
get_queue_mask(adev, pipe_id, queue_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Start the EOP fetcher */
|
/* Start the EOP fetcher */
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
|
||||||
REG_SET_FIELD(m->cp_hqd_eop_rptr,
|
REG_SET_FIELD(m->cp_hqd_eop_rptr,
|
||||||
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
|
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
|
||||||
|
|
||||||
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
|
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
|
||||||
|
|
||||||
release_queue(kgd);
|
release_queue(kgd);
|
||||||
|
|
||||||
|
@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
acquire_queue(kgd, pipe_id, queue_id);
|
acquire_queue(kgd, pipe_id, queue_id);
|
||||||
|
|
||||||
if (m->cp_hqd_vmid == 0)
|
if (m->cp_hqd_vmid == 0)
|
||||||
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
|
WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
|
||||||
|
|
||||||
switch (reset_type) {
|
switch (reset_type) {
|
||||||
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
|
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
|
||||||
|
@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
|
||||||
|
|
||||||
end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
||||||
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
|
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
|
||||||
{
|
uint32_t flush_type)
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
|
||||||
|
|
||||||
/* Use legacy mode tlb invalidation.
|
|
||||||
*
|
|
||||||
* Currently on Raven the code below is broken for anything but
|
|
||||||
* legacy mode due to a MMHUB power gating problem. A workaround
|
|
||||||
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
|
|
||||||
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
|
|
||||||
* bit.
|
|
||||||
*
|
|
||||||
* TODO 1: agree on the right set of invalidation registers for
|
|
||||||
* KFD use. Use the last one for now. Invalidate both GC and
|
|
||||||
* MMHUB.
|
|
||||||
*
|
|
||||||
* TODO 2: support range-based invalidation, requires kfg2kgd
|
|
||||||
* interface change
|
|
||||||
*/
|
|
||||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
|
||||||
{
|
{
|
||||||
signed long r;
|
signed long r;
|
||||||
uint32_t seq;
|
uint32_t seq;
|
||||||
|
@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
||||||
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
|
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
|
||||||
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
|
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
|
||||||
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
|
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
|
||||||
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
|
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
|
||||||
amdgpu_fence_emit_polling(ring, &seq);
|
amdgpu_fence_emit_polling(ring, &seq);
|
||||||
amdgpu_ring_commit(ring);
|
amdgpu_ring_commit(ring);
|
||||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||||
|
@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||||
int vmid;
|
int vmid;
|
||||||
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
|
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
|
||||||
|
uint32_t flush_type = 0;
|
||||||
|
|
||||||
if (adev->in_gpu_reset)
|
if (adev->in_gpu_reset)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
if (adev->gmc.xgmi.num_physical_nodes &&
|
||||||
|
adev->asic_type == CHIP_VEGA20)
|
||||||
|
flush_type = 2;
|
||||||
|
|
||||||
if (ring->sched.ready)
|
if (ring->sched.ready)
|
||||||
return invalidate_tlbs_with_kiq(adev, pasid);
|
return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
|
||||||
|
|
||||||
for (vmid = 0; vmid < 16; vmid++) {
|
for (vmid = 0; vmid < 16; vmid++) {
|
||||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||||
|
@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
||||||
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
|
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
|
||||||
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
|
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
|
||||||
== pasid) {
|
== pasid) {
|
||||||
write_vmid_invalidate_request(kgd, vmid);
|
amdgpu_gmc_flush_gpu_tlb(adev, vmid,
|
||||||
|
flush_type);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
write_vmid_invalidate_request(kgd, vmid);
|
/* Use legacy mode tlb invalidation.
|
||||||
|
*
|
||||||
|
* Currently on Raven the code below is broken for anything but
|
||||||
|
* legacy mode due to a MMHUB power gating problem. A workaround
|
||||||
|
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
|
||||||
|
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
|
||||||
|
* bit.
|
||||||
|
*
|
||||||
|
* TODO 1: agree on the right set of invalidation registers for
|
||||||
|
* KFD use. Use the last one for now. Invalidate both GC and
|
||||||
|
* MMHUB.
|
||||||
|
*
|
||||||
|
* TODO 2: support range-based invalidation, requires kfg2kgd
|
||||||
|
* interface change
|
||||||
|
*/
|
||||||
|
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
||||||
|
|
||||||
mutex_lock(&adev->grbm_idx_mutex);
|
mutex_lock(&adev->grbm_idx_mutex);
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
|
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
|
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
|
||||||
|
|
||||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||||
|
@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
||||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||||
SE_BROADCAST_WRITES, 1);
|
SE_BROADCAST_WRITES, 1);
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
|
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||||
mutex_unlock(&adev->grbm_idx_mutex);
|
mutex_unlock(&adev->grbm_idx_mutex);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "amdgpu_object.h"
|
#include "amdgpu_object.h"
|
||||||
#include "amdgpu_vm.h"
|
#include "amdgpu_vm.h"
|
||||||
#include "amdgpu_amdkfd.h"
|
#include "amdgpu_amdkfd.h"
|
||||||
|
#include "amdgpu_dma_buf.h"
|
||||||
|
|
||||||
/* Special VM and GART address alignment needed for VI pre-Fiji due to
|
/* Special VM and GART address alignment needed for VI pre-Fiji due to
|
||||||
* a HW bug.
|
* a HW bug.
|
||||||
|
@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
||||||
mutex_unlock(&process_info->lock);
|
mutex_unlock(&process_info->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
|
||||||
|
struct amdkfd_process_info *process_info)
|
||||||
|
{
|
||||||
|
struct ttm_validate_buffer *bo_list_entry;
|
||||||
|
|
||||||
|
bo_list_entry = &mem->validate_list;
|
||||||
|
mutex_lock(&process_info->lock);
|
||||||
|
list_del(&bo_list_entry->head);
|
||||||
|
mutex_unlock(&process_info->lock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Initializes user pages. It registers the MMU notifier and validates
|
/* Initializes user pages. It registers the MMU notifier and validates
|
||||||
* the userptr BO in the GTT domain.
|
* the userptr BO in the GTT domain.
|
||||||
*
|
*
|
||||||
|
@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If no restore worker is running concurrently, user_pages
|
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
|
||||||
* should not be allocated
|
|
||||||
*/
|
|
||||||
WARN(mem->user_pages, "Leaking user_pages array");
|
|
||||||
|
|
||||||
mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
|
|
||||||
sizeof(struct page *),
|
|
||||||
GFP_KERNEL | __GFP_ZERO);
|
|
||||||
if (!mem->user_pages) {
|
|
||||||
pr_err("%s: Failed to allocate pages array\n", __func__);
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto unregister_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
||||||
goto free_out;
|
goto unregister_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
|
|
||||||
|
|
||||||
ret = amdgpu_bo_reserve(bo, true);
|
ret = amdgpu_bo_reserve(bo, true);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_err("%s: Failed to reserve BO\n", __func__);
|
pr_err("%s: Failed to reserve BO\n", __func__);
|
||||||
|
@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
|
||||||
amdgpu_bo_unreserve(bo);
|
amdgpu_bo_unreserve(bo);
|
||||||
|
|
||||||
release_out:
|
release_out:
|
||||||
if (ret)
|
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
|
||||||
free_out:
|
|
||||||
kvfree(mem->user_pages);
|
|
||||||
mem->user_pages = NULL;
|
|
||||||
unregister_out:
|
unregister_out:
|
||||||
if (ret)
|
if (ret)
|
||||||
amdgpu_mn_unregister(bo);
|
amdgpu_mn_unregister(bo);
|
||||||
|
@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
|
||||||
ctx->kfd_bo.priority = 0;
|
ctx->kfd_bo.priority = 0;
|
||||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||||
ctx->kfd_bo.tv.num_shared = 1;
|
ctx->kfd_bo.tv.num_shared = 1;
|
||||||
ctx->kfd_bo.user_pages = NULL;
|
|
||||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||||
|
|
||||||
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
|
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
|
||||||
|
@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
|
||||||
ctx->kfd_bo.priority = 0;
|
ctx->kfd_bo.priority = 0;
|
||||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||||
ctx->kfd_bo.tv.num_shared = 1;
|
ctx->kfd_bo.tv.num_shared = 1;
|
||||||
ctx->kfd_bo.user_pages = NULL;
|
|
||||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
|
@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||||
AMDGPU_FENCE_OWNER_KFD, false);
|
AMDGPU_FENCE_OWNER_KFD, false);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto wait_pd_fail;
|
goto wait_pd_fail;
|
||||||
|
ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
|
||||||
|
if (ret)
|
||||||
|
goto reserve_shared_fail;
|
||||||
amdgpu_bo_fence(vm->root.base.bo,
|
amdgpu_bo_fence(vm->root.base.bo,
|
||||||
&vm->process_info->eviction_fence->base, true);
|
&vm->process_info->eviction_fence->base, true);
|
||||||
amdgpu_bo_unreserve(vm->root.base.bo);
|
amdgpu_bo_unreserve(vm->root.base.bo);
|
||||||
|
@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
reserve_shared_fail:
|
||||||
wait_pd_fail:
|
wait_pd_fail:
|
||||||
validate_pd_fail:
|
validate_pd_fail:
|
||||||
amdgpu_bo_unreserve(vm->root.base.bo);
|
amdgpu_bo_unreserve(vm->root.base.bo);
|
||||||
|
@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||||
if (!offset || !*offset)
|
if (!offset || !*offset)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
user_addr = *offset;
|
user_addr = *offset;
|
||||||
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
|
} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
|
||||||
|
ALLOC_MEM_FLAGS_MMIO_REMAP)) {
|
||||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||||
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||||
bo_type = ttm_bo_type_sg;
|
bo_type = ttm_bo_type_sg;
|
||||||
|
@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||||
|
|
||||||
if (user_addr) {
|
if (user_addr) {
|
||||||
ret = init_user_pages(*mem, current->mm, user_addr);
|
ret = init_user_pages(*mem, current->mm, user_addr);
|
||||||
if (ret) {
|
if (ret)
|
||||||
mutex_lock(&avm->process_info->lock);
|
|
||||||
list_del(&(*mem)->validate_list.head);
|
|
||||||
mutex_unlock(&avm->process_info->lock);
|
|
||||||
goto allocate_init_user_pages_failed;
|
goto allocate_init_user_pages_failed;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offset)
|
if (offset)
|
||||||
|
@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
allocate_init_user_pages_failed:
|
allocate_init_user_pages_failed:
|
||||||
|
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
|
||||||
amdgpu_bo_unref(&bo);
|
amdgpu_bo_unref(&bo);
|
||||||
/* Don't unreserve system mem limit twice */
|
/* Don't unreserve system mem limit twice */
|
||||||
goto err_reserve_limit;
|
goto err_reserve_limit;
|
||||||
|
@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||||
list_del(&bo_list_entry->head);
|
list_del(&bo_list_entry->head);
|
||||||
mutex_unlock(&process_info->lock);
|
mutex_unlock(&process_info->lock);
|
||||||
|
|
||||||
/* Free user pages if necessary */
|
|
||||||
if (mem->user_pages) {
|
|
||||||
pr_debug("%s: Freeing user_pages array\n", __func__);
|
|
||||||
if (mem->user_pages[0])
|
|
||||||
release_pages(mem->user_pages,
|
|
||||||
mem->bo->tbo.ttm->num_pages);
|
|
||||||
kvfree(mem->user_pages);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
|
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
||||||
/* Free the sync object */
|
/* Free the sync object */
|
||||||
amdgpu_sync_free(&mem->sync);
|
amdgpu_sync_free(&mem->sync);
|
||||||
|
|
||||||
/* If the SG is not NULL, it's one we created for a doorbell
|
/* If the SG is not NULL, it's one we created for a doorbell or mmio
|
||||||
* BO. We need to free it.
|
* remap BO. We need to free it.
|
||||||
*/
|
*/
|
||||||
if (mem->bo->tbo.sg) {
|
if (mem->bo->tbo.sg) {
|
||||||
sg_free_table(mem->bo->tbo.sg);
|
sg_free_table(mem->bo->tbo.sg);
|
||||||
|
@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
||||||
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
|
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
|
||||||
is_invalid_userptr);
|
is_invalid_userptr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
pr_err("Failed to map radeon bo to gpuvm\n");
|
pr_err("Failed to map bo to gpuvm\n");
|
||||||
goto map_bo_to_gpuvm_failed;
|
goto map_bo_to_gpuvm_failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
||||||
|
|
||||||
bo = mem->bo;
|
bo = mem->bo;
|
||||||
|
|
||||||
if (!mem->user_pages) {
|
|
||||||
mem->user_pages =
|
|
||||||
kvmalloc_array(bo->tbo.ttm->num_pages,
|
|
||||||
sizeof(struct page *),
|
|
||||||
GFP_KERNEL | __GFP_ZERO);
|
|
||||||
if (!mem->user_pages) {
|
|
||||||
pr_err("%s: Failed to allocate pages array\n",
|
|
||||||
__func__);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
} else if (mem->user_pages[0]) {
|
|
||||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get updated user pages */
|
/* Get updated user pages */
|
||||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
|
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
|
||||||
mem->user_pages);
|
bo->tbo.ttm->pages);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
mem->user_pages[0] = NULL;
|
bo->tbo.ttm->pages[0] = NULL;
|
||||||
pr_info("%s: Failed to get user pages: %d\n",
|
pr_info("%s: Failed to get user pages: %d\n",
|
||||||
__func__, ret);
|
__func__, ret);
|
||||||
/* Pretend it succeeded. It will fail later
|
/* Pretend it succeeded. It will fail later
|
||||||
|
@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
||||||
* stalled user mode queues.
|
* stalled user mode queues.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mark the BO as valid unless it was invalidated
|
|
||||||
* again concurrently
|
|
||||||
*/
|
|
||||||
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
|
|
||||||
return -EAGAIN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Remove invalid userptr BOs from hmm track list
|
||||||
|
*
|
||||||
|
* Stop HMM track the userptr update
|
||||||
|
*/
|
||||||
|
static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||||
|
{
|
||||||
|
struct kgd_mem *mem, *tmp_mem;
|
||||||
|
struct amdgpu_bo *bo;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(mem, tmp_mem,
|
||||||
|
&process_info->userptr_inval_list,
|
||||||
|
validate_list.head) {
|
||||||
|
bo = mem->bo;
|
||||||
|
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Validate invalid userptr BOs
|
/* Validate invalid userptr BOs
|
||||||
*
|
*
|
||||||
* Validates BOs on the userptr_inval_list, and moves them back to the
|
* Validates BOs on the userptr_inval_list, and moves them back to the
|
||||||
|
@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!pd_bo_list_entries) {
|
if (!pd_bo_list_entries) {
|
||||||
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
|
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
|
||||||
return -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
goto out_no_mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&resv_list);
|
INIT_LIST_HEAD(&resv_list);
|
||||||
|
@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||||
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
|
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
|
||||||
WARN(!list_empty(&duplicates), "Duplicates should be empty");
|
WARN(!list_empty(&duplicates), "Duplicates should be empty");
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out_free;
|
||||||
|
|
||||||
amdgpu_sync_create(&sync);
|
amdgpu_sync_create(&sync);
|
||||||
|
|
||||||
|
@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||||
|
|
||||||
bo = mem->bo;
|
bo = mem->bo;
|
||||||
|
|
||||||
/* Copy pages array and validate the BO if we got user pages */
|
/* Validate the BO if we got user pages */
|
||||||
if (mem->user_pages[0]) {
|
if (bo->tbo.ttm->pages[0]) {
|
||||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
|
||||||
mem->user_pages);
|
|
||||||
amdgpu_bo_placement_from_domain(bo, mem->domain);
|
amdgpu_bo_placement_from_domain(bo, mem->domain);
|
||||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Validate succeeded, now the BO owns the pages, free
|
|
||||||
* our copy of the pointer array. Put this BO back on
|
|
||||||
* the userptr_valid_list. If we need to revalidate
|
|
||||||
* it, we need to start from scratch.
|
|
||||||
*/
|
|
||||||
kvfree(mem->user_pages);
|
|
||||||
mem->user_pages = NULL;
|
|
||||||
list_move_tail(&mem->validate_list.head,
|
list_move_tail(&mem->validate_list.head,
|
||||||
&process_info->userptr_valid_list);
|
&process_info->userptr_valid_list);
|
||||||
|
|
||||||
|
/* Stop HMM track the userptr update. We dont check the return
|
||||||
|
* value for concurrent CPU page table update because we will
|
||||||
|
* reschedule the restore worker if process_info->evicted_bos
|
||||||
|
* is updated.
|
||||||
|
*/
|
||||||
|
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||||
|
|
||||||
/* Update mapping. If the BO was not validated
|
/* Update mapping. If the BO was not validated
|
||||||
* (because we couldn't get user pages), this will
|
* (because we couldn't get user pages), this will
|
||||||
* clear the page table entries, which will result in
|
* clear the page table entries, which will result in
|
||||||
|
@ -1897,8 +1876,9 @@ unreserve_out:
|
||||||
ttm_eu_backoff_reservation(&ticket, &resv_list);
|
ttm_eu_backoff_reservation(&ticket, &resv_list);
|
||||||
amdgpu_sync_wait(&sync, false);
|
amdgpu_sync_wait(&sync, false);
|
||||||
amdgpu_sync_free(&sync);
|
amdgpu_sync_free(&sync);
|
||||||
out:
|
out_free:
|
||||||
kfree(pd_bo_list_entries);
|
kfree(pd_bo_list_entries);
|
||||||
|
out_no_mem:
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
|
||||||
* hanging. No point trying again.
|
* hanging. No point trying again.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_out:
|
unlock_out:
|
||||||
|
untrack_invalid_user_pages(process_info);
|
||||||
mutex_unlock(&process_info->lock);
|
mutex_unlock(&process_info->lock);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
put_task_struct(usertask);
|
put_task_struct(usertask);
|
||||||
|
@ -2130,3 +2112,88 @@ ttm_reserve_fail:
|
||||||
kfree(pd_bo_list);
|
kfree(pd_bo_list);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
|
||||||
|
{
|
||||||
|
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
|
||||||
|
struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!info || !gws)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||||
|
if (!*mem)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_init(&(*mem)->lock);
|
||||||
|
(*mem)->bo = amdgpu_bo_ref(gws_bo);
|
||||||
|
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
|
||||||
|
(*mem)->process_info = process_info;
|
||||||
|
add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
|
||||||
|
amdgpu_sync_create(&(*mem)->sync);
|
||||||
|
|
||||||
|
|
||||||
|
/* Validate gws bo the first time it is added to process */
|
||||||
|
mutex_lock(&(*mem)->process_info->lock);
|
||||||
|
ret = amdgpu_bo_reserve(gws_bo, false);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
pr_err("Reserve gws bo failed %d\n", ret);
|
||||||
|
goto bo_reservation_failure;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
|
||||||
|
if (ret) {
|
||||||
|
pr_err("GWS BO validate failed %d\n", ret);
|
||||||
|
goto bo_validation_failure;
|
||||||
|
}
|
||||||
|
/* GWS resource is shared b/t amdgpu and amdkfd
|
||||||
|
* Add process eviction fence to bo so they can
|
||||||
|
* evict each other.
|
||||||
|
*/
|
||||||
|
amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
|
||||||
|
amdgpu_bo_unreserve(gws_bo);
|
||||||
|
mutex_unlock(&(*mem)->process_info->lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
bo_validation_failure:
|
||||||
|
amdgpu_bo_unreserve(gws_bo);
|
||||||
|
bo_reservation_failure:
|
||||||
|
mutex_unlock(&(*mem)->process_info->lock);
|
||||||
|
amdgpu_sync_free(&(*mem)->sync);
|
||||||
|
remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
|
||||||
|
amdgpu_bo_unref(&gws_bo);
|
||||||
|
mutex_destroy(&(*mem)->lock);
|
||||||
|
kfree(*mem);
|
||||||
|
*mem = NULL;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
|
||||||
|
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
|
||||||
|
struct amdgpu_bo *gws_bo = kgd_mem->bo;
|
||||||
|
|
||||||
|
/* Remove BO from process's validate list so restore worker won't touch
|
||||||
|
* it anymore
|
||||||
|
*/
|
||||||
|
remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
|
||||||
|
|
||||||
|
ret = amdgpu_bo_reserve(gws_bo, false);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
pr_err("Reserve gws bo failed %d\n", ret);
|
||||||
|
//TODO add BO back to validate_list?
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
amdgpu_amdkfd_remove_eviction_fence(gws_bo,
|
||||||
|
process_info->eviction_fence);
|
||||||
|
amdgpu_bo_unreserve(gws_bo);
|
||||||
|
amdgpu_sync_free(&kgd_mem->sync);
|
||||||
|
amdgpu_bo_unref(&gws_bo);
|
||||||
|
mutex_destroy(&kgd_mem->lock);
|
||||||
|
kfree(mem);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
kref_init(&list->refcount);
|
kref_init(&list->refcount);
|
||||||
list->gds_obj = adev->gds.gds_gfx_bo;
|
list->gds_obj = NULL;
|
||||||
list->gws_obj = adev->gds.gws_gfx_bo;
|
list->gws_obj = NULL;
|
||||||
list->oa_obj = adev->gds.oa_gfx_bo;
|
list->oa_obj = NULL;
|
||||||
|
|
||||||
array = amdgpu_bo_list_array_entry(list, 0);
|
array = amdgpu_bo_list_array_entry(list, 0);
|
||||||
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
|
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
|
||||||
|
|
|
@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
|
||||||
struct amdgpu_bo_va *bo_va;
|
struct amdgpu_bo_va *bo_va;
|
||||||
uint32_t priority;
|
uint32_t priority;
|
||||||
struct page **user_pages;
|
struct page **user_pages;
|
||||||
int user_invalidated;
|
bool user_invalidated;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_bo_list {
|
struct amdgpu_bo_list {
|
||||||
|
|
|
@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
|
||||||
p->uf_entry.tv.bo = &bo->tbo;
|
p->uf_entry.tv.bo = &bo->tbo;
|
||||||
/* One for TTM and one for the CS job */
|
/* One for TTM and one for the CS job */
|
||||||
p->uf_entry.tv.num_shared = 2;
|
p->uf_entry.tv.num_shared = 2;
|
||||||
p->uf_entry.user_pages = NULL;
|
|
||||||
|
|
||||||
drm_gem_object_put_unlocked(gobj);
|
drm_gem_object_put_unlocked(gobj);
|
||||||
|
|
||||||
|
@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
|
||||||
if (usermm && usermm != current->mm)
|
if (usermm && usermm != current->mm)
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
/* Check if we have user pages and nobody bound the BO already */
|
if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
|
||||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
|
lobj->user_invalidated && lobj->user_pages) {
|
||||||
lobj->user_pages) {
|
|
||||||
amdgpu_bo_placement_from_domain(bo,
|
amdgpu_bo_placement_from_domain(bo,
|
||||||
AMDGPU_GEM_DOMAIN_CPU);
|
AMDGPU_GEM_DOMAIN_CPU);
|
||||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
||||||
lobj->user_pages);
|
lobj->user_pages);
|
||||||
binding_userptr = true;
|
binding_userptr = true;
|
||||||
|
@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||||
struct amdgpu_bo *gds;
|
struct amdgpu_bo *gds;
|
||||||
struct amdgpu_bo *gws;
|
struct amdgpu_bo *gws;
|
||||||
struct amdgpu_bo *oa;
|
struct amdgpu_bo *oa;
|
||||||
unsigned tries = 10;
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&p->validated);
|
INIT_LIST_HEAD(&p->validated);
|
||||||
|
@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||||
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
|
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
|
||||||
list_add(&p->uf_entry.tv.head, &p->validated);
|
list_add(&p->uf_entry.tv.head, &p->validated);
|
||||||
|
|
||||||
while (1) {
|
/* Get userptr backing pages. If pages are updated after registered
|
||||||
struct list_head need_pages;
|
* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
|
||||||
|
* amdgpu_ttm_backend_bind() to flush and invalidate new pages
|
||||||
|
*/
|
||||||
|
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||||
|
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||||
|
bool userpage_invalidated = false;
|
||||||
|
int i;
|
||||||
|
|
||||||
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
|
e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||||
&duplicates);
|
sizeof(struct page *),
|
||||||
if (unlikely(r != 0)) {
|
GFP_KERNEL | __GFP_ZERO);
|
||||||
if (r != -ERESTARTSYS)
|
if (!e->user_pages) {
|
||||||
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
|
DRM_ERROR("calloc failure\n");
|
||||||
goto error_free_pages;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&need_pages);
|
r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
|
||||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
if (r) {
|
||||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
kvfree(e->user_pages);
|
||||||
|
e->user_pages = NULL;
|
||||||
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
|
return r;
|
||||||
&e->user_invalidated) && e->user_pages) {
|
|
||||||
|
|
||||||
/* We acquired a page array, but somebody
|
|
||||||
* invalidated it. Free it and try again
|
|
||||||
*/
|
|
||||||
release_pages(e->user_pages,
|
|
||||||
bo->tbo.ttm->num_pages);
|
|
||||||
kvfree(e->user_pages);
|
|
||||||
e->user_pages = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
|
|
||||||
!e->user_pages) {
|
|
||||||
list_del(&e->tv.head);
|
|
||||||
list_add(&e->tv.head, &need_pages);
|
|
||||||
|
|
||||||
amdgpu_bo_unreserve(bo);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (list_empty(&need_pages))
|
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
|
||||||
break;
|
if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
|
||||||
|
userpage_invalidated = true;
|
||||||
/* Unreserve everything again. */
|
break;
|
||||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
|
||||||
|
|
||||||
/* We tried too many times, just abort */
|
|
||||||
if (!--tries) {
|
|
||||||
r = -EDEADLK;
|
|
||||||
DRM_ERROR("deadlock in %s\n", __func__);
|
|
||||||
goto error_free_pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fill the page arrays for all userptrs. */
|
|
||||||
list_for_each_entry(e, &need_pages, tv.head) {
|
|
||||||
struct ttm_tt *ttm = e->tv.bo->ttm;
|
|
||||||
|
|
||||||
e->user_pages = kvmalloc_array(ttm->num_pages,
|
|
||||||
sizeof(struct page*),
|
|
||||||
GFP_KERNEL | __GFP_ZERO);
|
|
||||||
if (!e->user_pages) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
DRM_ERROR("calloc failure in %s\n", __func__);
|
|
||||||
goto error_free_pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
|
|
||||||
if (r) {
|
|
||||||
DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
|
|
||||||
kvfree(e->user_pages);
|
|
||||||
e->user_pages = NULL;
|
|
||||||
goto error_free_pages;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
e->user_invalidated = userpage_invalidated;
|
||||||
|
}
|
||||||
|
|
||||||
/* And try again. */
|
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
|
||||||
list_splice(&need_pages, &p->validated);
|
&duplicates);
|
||||||
|
if (unlikely(r != 0)) {
|
||||||
|
if (r != -ERESTARTSYS)
|
||||||
|
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
||||||
|
@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||||
error_validate:
|
error_validate:
|
||||||
if (r)
|
if (r)
|
||||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||||
|
out:
|
||||||
error_free_pages:
|
|
||||||
|
|
||||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
|
||||||
if (!e->user_pages)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
|
|
||||||
kvfree(e->user_pages);
|
|
||||||
}
|
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* UVD & VCE fw doesn't support user fences */
|
/* MM engine doesn't support user fences */
|
||||||
ring = to_amdgpu_ring(parser->entity->rq->sched);
|
ring = to_amdgpu_ring(parser->entity->rq->sched);
|
||||||
if (parser->job->uf_addr && (
|
if (parser->job->uf_addr && ring->funcs->no_user_fence)
|
||||||
ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
|
|
||||||
ring->funcs->type == AMDGPU_RING_TYPE_VCE))
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
|
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
|
||||||
|
@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||||
struct amdgpu_bo_list_entry *e;
|
struct amdgpu_bo_list_entry *e;
|
||||||
struct amdgpu_job *job;
|
struct amdgpu_job *job;
|
||||||
uint64_t seq;
|
uint64_t seq;
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
job = p->job;
|
job = p->job;
|
||||||
|
@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||||
if (r)
|
if (r)
|
||||||
goto error_unlock;
|
goto error_unlock;
|
||||||
|
|
||||||
/* No memory allocation is allowed while holding the mn lock */
|
/* No memory allocation is allowed while holding the mn lock.
|
||||||
|
* p->mn is hold until amdgpu_cs_submit is finished and fence is added
|
||||||
|
* to BOs.
|
||||||
|
*/
|
||||||
amdgpu_mn_lock(p->mn);
|
amdgpu_mn_lock(p->mn);
|
||||||
|
|
||||||
|
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
|
||||||
|
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
|
||||||
|
*/
|
||||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||||
|
|
||||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
|
r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||||
r = -ERESTARTSYS;
|
}
|
||||||
goto error_abort;
|
if (r) {
|
||||||
}
|
r = -EAGAIN;
|
||||||
|
goto error_abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
job->owner = p->filp;
|
job->owner = p->filp;
|
||||||
|
@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||||
|
|
||||||
out:
|
out:
|
||||||
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {
|
||||||
"LAST",
|
"LAST",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: pcie_replay_count
|
||||||
|
*
|
||||||
|
* The amdgpu driver provides a sysfs API for reporting the total number
|
||||||
|
* of PCIe replays (NAKs)
|
||||||
|
* The file pcie_replay_count is used for this and returns the total
|
||||||
|
* number of replays as a sum of the NAKs generated and NAKs received
|
||||||
|
*/
|
||||||
|
|
||||||
|
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||||
|
struct amdgpu_device *adev = ddev->dev_private;
|
||||||
|
uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
|
||||||
|
amdgpu_device_get_pcie_replay_count, NULL);
|
||||||
|
|
||||||
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
|
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -910,8 +932,10 @@ def_value:
|
||||||
* Validates certain module parameters and updates
|
* Validates certain module parameters and updates
|
||||||
* the associated values used by the driver (all asics).
|
* the associated values used by the driver (all asics).
|
||||||
*/
|
*/
|
||||||
static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (amdgpu_sched_jobs < 4) {
|
if (amdgpu_sched_jobs < 4) {
|
||||||
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
|
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
|
||||||
amdgpu_sched_jobs);
|
amdgpu_sched_jobs);
|
||||||
|
@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||||
amdgpu_vram_page_split = 1024;
|
amdgpu_vram_page_split = 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (amdgpu_lockup_timeout == 0) {
|
ret = amdgpu_device_get_job_timeout_settings(adev);
|
||||||
dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
|
if (ret) {
|
||||||
amdgpu_lockup_timeout = 10000;
|
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
|
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
||||||
r = amdgpu_virt_request_full_gpu(adev, true);
|
r = amdgpu_virt_request_full_gpu(adev, true);
|
||||||
if (r)
|
if (r)
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
/* query the reg access mode at the very beginning */
|
||||||
|
amdgpu_virt_init_reg_access_mode(adev);
|
||||||
}
|
}
|
||||||
|
|
||||||
adev->pm.pp_feature = amdgpu_pp_feature_mask;
|
adev->pm.pp_feature = amdgpu_pp_feature_mask;
|
||||||
if (amdgpu_sriov_vf(adev))
|
if (amdgpu_sriov_vf(adev))
|
||||||
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
|
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
|
||||||
|
|
||||||
|
/* Read BIOS */
|
||||||
|
if (!amdgpu_get_bios(adev))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
r = amdgpu_atombios_init(adev);
|
||||||
|
if (r) {
|
||||||
|
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
||||||
|
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||||
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
|
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
|
||||||
DRM_ERROR("disabled ip block: %d <%s>\n",
|
DRM_ERROR("disabled ip block: %d <%s>\n",
|
||||||
|
@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
|
||||||
if (adev->ip_blocks[i].status.hw)
|
if (adev->ip_blocks[i].status.hw)
|
||||||
continue;
|
continue;
|
||||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
|
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
|
||||||
|
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
|
||||||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
|
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
|
||||||
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
|
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
|
||||||
if (r) {
|
if (r) {
|
||||||
|
@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||||
mutex_init(&adev->lock_reset);
|
mutex_init(&adev->lock_reset);
|
||||||
mutex_init(&adev->virt.dpm_mutex);
|
mutex_init(&adev->virt.dpm_mutex);
|
||||||
|
|
||||||
amdgpu_device_check_arguments(adev);
|
r = amdgpu_device_check_arguments(adev);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
spin_lock_init(&adev->mmio_idx_lock);
|
spin_lock_init(&adev->mmio_idx_lock);
|
||||||
spin_lock_init(&adev->smc_idx_lock);
|
spin_lock_init(&adev->smc_idx_lock);
|
||||||
|
@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||||
goto fence_driver_init;
|
goto fence_driver_init;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read BIOS */
|
|
||||||
if (!amdgpu_get_bios(adev)) {
|
|
||||||
r = -EINVAL;
|
|
||||||
goto failed;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = amdgpu_atombios_init(adev);
|
|
||||||
if (r) {
|
|
||||||
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
|
||||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
|
||||||
goto failed;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* detect if we are with an SRIOV vbios */
|
/* detect if we are with an SRIOV vbios */
|
||||||
amdgpu_device_detect_sriov_bios(adev);
|
amdgpu_device_detect_sriov_bios(adev);
|
||||||
|
|
||||||
|
@ -2672,6 +2703,10 @@ fence_driver_init:
|
||||||
if (r)
|
if (r)
|
||||||
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
|
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
|
||||||
|
|
||||||
|
r = amdgpu_ucode_sysfs_init(adev);
|
||||||
|
if (r)
|
||||||
|
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
|
||||||
|
|
||||||
r = amdgpu_debugfs_gem_init(adev);
|
r = amdgpu_debugfs_gem_init(adev);
|
||||||
if (r)
|
if (r)
|
||||||
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
|
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
|
||||||
|
@ -2712,7 +2747,13 @@ fence_driver_init:
|
||||||
}
|
}
|
||||||
|
|
||||||
/* must succeed. */
|
/* must succeed. */
|
||||||
amdgpu_ras_post_init(adev);
|
amdgpu_ras_resume(adev);
|
||||||
|
|
||||||
|
r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
|
||||||
|
if (r) {
|
||||||
|
dev_err(adev->dev, "Could not create pcie_replay_count");
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
|
||||||
adev->rmmio = NULL;
|
adev->rmmio = NULL;
|
||||||
amdgpu_device_doorbell_fini(adev);
|
amdgpu_device_doorbell_fini(adev);
|
||||||
amdgpu_debugfs_regs_cleanup(adev);
|
amdgpu_debugfs_regs_cleanup(adev);
|
||||||
|
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
|
||||||
|
amdgpu_ucode_sysfs_fini(adev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
|
||||||
|
|
||||||
amdgpu_amdkfd_suspend(adev);
|
amdgpu_amdkfd_suspend(adev);
|
||||||
|
|
||||||
|
amdgpu_ras_suspend(adev);
|
||||||
|
|
||||||
r = amdgpu_device_ip_suspend_phase1(adev);
|
r = amdgpu_device_ip_suspend_phase1(adev);
|
||||||
|
|
||||||
/* evict vram memory */
|
/* evict vram memory */
|
||||||
|
@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
|
||||||
|
|
||||||
drm_kms_helper_poll_enable(dev);
|
drm_kms_helper_poll_enable(dev);
|
||||||
|
|
||||||
|
amdgpu_ras_resume(adev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Most of the connector probing functions try to acquire runtime pm
|
* Most of the connector probing functions try to acquire runtime pm
|
||||||
* refs to ensure that the GPU is powered on when connector polling is
|
* refs to ensure that the GPU is powered on when connector polling is
|
||||||
|
@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
||||||
if (vram_lost)
|
if (vram_lost)
|
||||||
amdgpu_device_fill_reset_magic(tmp_adev);
|
amdgpu_device_fill_reset_magic(tmp_adev);
|
||||||
|
|
||||||
|
r = amdgpu_device_ip_late_init(tmp_adev);
|
||||||
|
if (r)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* must succeed. */
|
||||||
|
amdgpu_ras_resume(tmp_adev);
|
||||||
|
|
||||||
/* Update PSP FW topology after reset */
|
/* Update PSP FW topology after reset */
|
||||||
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
|
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
|
||||||
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||||
|
@ -3695,43 +3749,6 @@ skip_hw_reset:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
|
|
||||||
enum pci_bus_speed *speed,
|
|
||||||
enum pcie_link_width *width)
|
|
||||||
{
|
|
||||||
struct pci_dev *pdev = adev->pdev;
|
|
||||||
enum pci_bus_speed cur_speed;
|
|
||||||
enum pcie_link_width cur_width;
|
|
||||||
u32 ret = 1;
|
|
||||||
|
|
||||||
*speed = PCI_SPEED_UNKNOWN;
|
|
||||||
*width = PCIE_LNK_WIDTH_UNKNOWN;
|
|
||||||
|
|
||||||
while (pdev) {
|
|
||||||
cur_speed = pcie_get_speed_cap(pdev);
|
|
||||||
cur_width = pcie_get_width_cap(pdev);
|
|
||||||
ret = pcie_bandwidth_available(adev->pdev, NULL,
|
|
||||||
NULL, &cur_width);
|
|
||||||
if (!ret)
|
|
||||||
cur_width = PCIE_LNK_WIDTH_RESRV;
|
|
||||||
|
|
||||||
if (cur_speed != PCI_SPEED_UNKNOWN) {
|
|
||||||
if (*speed == PCI_SPEED_UNKNOWN)
|
|
||||||
*speed = cur_speed;
|
|
||||||
else if (cur_speed < *speed)
|
|
||||||
*speed = cur_speed;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
|
|
||||||
if (*width == PCIE_LNK_WIDTH_UNKNOWN)
|
|
||||||
*width = cur_width;
|
|
||||||
else if (cur_width < *width)
|
|
||||||
*width = cur_width;
|
|
||||||
}
|
|
||||||
pdev = pci_upstream_bridge(pdev);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
||||||
*
|
*
|
||||||
|
@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
||||||
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
|
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
|
pcie_bandwidth_available(adev->pdev, NULL,
|
||||||
&platform_link_width);
|
&platform_speed_cap, &platform_link_width);
|
||||||
|
|
||||||
if (adev->pm.pcie_gen_mask == 0) {
|
if (adev->pm.pcie_gen_mask == 0) {
|
||||||
/* asic caps */
|
/* asic caps */
|
||||||
|
|
|
@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
|
||||||
amdgpu_dither_enum_list, sz);
|
amdgpu_dither_enum_list, sz);
|
||||||
|
|
||||||
if (amdgpu_device_has_dc_support(adev)) {
|
if (amdgpu_device_has_dc_support(adev)) {
|
||||||
adev->mode_info.max_bpc_property =
|
|
||||||
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
|
|
||||||
if (!adev->mode_info.max_bpc_property)
|
|
||||||
return -ENOMEM;
|
|
||||||
adev->mode_info.abm_level_property =
|
adev->mode_info.abm_level_property =
|
||||||
drm_property_create_range(adev->ddev, 0,
|
drm_property_create_range(adev->ddev, 0,
|
||||||
"abm level", 0, 4);
|
"abm level", 0, 4);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2012 Advanced Micro Devices, Inc.
|
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
|
||||||
* Returns:
|
* Returns:
|
||||||
* 0 on success or a negative error code on failure.
|
* 0 on success or a negative error code on failure.
|
||||||
*/
|
*/
|
||||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
|
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||||
|
@ -137,6 +138,235 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
__reservation_object_make_exclusive(struct reservation_object *obj)
|
||||||
|
{
|
||||||
|
struct dma_fence **fences;
|
||||||
|
unsigned int count;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
/* Now that was unexpected. */
|
||||||
|
} else if (count == 1) {
|
||||||
|
reservation_object_add_excl_fence(obj, fences[0]);
|
||||||
|
dma_fence_put(fences[0]);
|
||||||
|
kfree(fences);
|
||||||
|
} else {
|
||||||
|
struct dma_fence_array *array;
|
||||||
|
|
||||||
|
array = dma_fence_array_create(count, fences,
|
||||||
|
dma_fence_context_alloc(1), 0,
|
||||||
|
false);
|
||||||
|
if (!array)
|
||||||
|
goto err_fences_put;
|
||||||
|
|
||||||
|
reservation_object_add_excl_fence(obj, &array->base);
|
||||||
|
dma_fence_put(&array->base);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_fences_put:
|
||||||
|
while (count--)
|
||||||
|
dma_fence_put(fences[count]);
|
||||||
|
kfree(fences);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
|
||||||
|
* @dma_buf: Shared DMA buffer
|
||||||
|
* @attach: DMA-buf attachment
|
||||||
|
*
|
||||||
|
* Makes sure that the shared DMA buffer can be accessed by the target device.
|
||||||
|
* For now, simply pins it to the GTT domain, where it should be accessible by
|
||||||
|
* all DMA devices.
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* 0 on success or a negative error code on failure.
|
||||||
|
*/
|
||||||
|
static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
|
||||||
|
struct dma_buf_attachment *attach)
|
||||||
|
{
|
||||||
|
struct drm_gem_object *obj = dma_buf->priv;
|
||||||
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||||
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||||
|
long r;
|
||||||
|
|
||||||
|
r = drm_gem_map_attach(dma_buf, attach);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = amdgpu_bo_reserve(bo, false);
|
||||||
|
if (unlikely(r != 0))
|
||||||
|
goto error_detach;
|
||||||
|
|
||||||
|
|
||||||
|
if (attach->dev->driver != adev->dev->driver) {
|
||||||
|
/*
|
||||||
|
* We only create shared fences for internal use, but importers
|
||||||
|
* of the dmabuf rely on exclusive fences for implicitly
|
||||||
|
* tracking write hazards. As any of the current fences may
|
||||||
|
* correspond to a write, we need to convert all existing
|
||||||
|
* fences on the reservation object into a single exclusive
|
||||||
|
* fence.
|
||||||
|
*/
|
||||||
|
r = __reservation_object_make_exclusive(bo->tbo.resv);
|
||||||
|
if (r)
|
||||||
|
goto error_unreserve;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* pin buffer into GTT */
|
||||||
|
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||||
|
if (r)
|
||||||
|
goto error_unreserve;
|
||||||
|
|
||||||
|
if (attach->dev->driver != adev->dev->driver)
|
||||||
|
bo->prime_shared_count++;
|
||||||
|
|
||||||
|
error_unreserve:
|
||||||
|
amdgpu_bo_unreserve(bo);
|
||||||
|
|
||||||
|
error_detach:
|
||||||
|
if (r)
|
||||||
|
drm_gem_map_detach(dma_buf, attach);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
|
||||||
|
* @dma_buf: Shared DMA buffer
|
||||||
|
* @attach: DMA-buf attachment
|
||||||
|
*
|
||||||
|
* This is called when a shared DMA buffer no longer needs to be accessible by
|
||||||
|
* another device. For now, simply unpins the buffer from GTT.
|
||||||
|
*/
|
||||||
|
static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
|
||||||
|
struct dma_buf_attachment *attach)
|
||||||
|
{
|
||||||
|
struct drm_gem_object *obj = dma_buf->priv;
|
||||||
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||||
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
ret = amdgpu_bo_reserve(bo, true);
|
||||||
|
if (unlikely(ret != 0))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
amdgpu_bo_unpin(bo);
|
||||||
|
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
|
||||||
|
bo->prime_shared_count--;
|
||||||
|
amdgpu_bo_unreserve(bo);
|
||||||
|
|
||||||
|
error:
|
||||||
|
drm_gem_map_detach(dma_buf, attach);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
|
||||||
|
* @obj: GEM BO
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* The BO's reservation object.
|
||||||
|
*/
|
||||||
|
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
|
||||||
|
{
|
||||||
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||||
|
|
||||||
|
return bo->tbo.resv;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
|
||||||
|
* @dma_buf: Shared DMA buffer
|
||||||
|
* @direction: Direction of DMA transfer
|
||||||
|
*
|
||||||
|
* This is called before CPU access to the shared DMA buffer's memory. If it's
|
||||||
|
* a read access, the buffer is moved to the GTT domain if possible, for optimal
|
||||||
|
* CPU read performance.
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* 0 on success or a negative error code on failure.
|
||||||
|
*/
|
||||||
|
static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
|
||||||
|
enum dma_data_direction direction)
|
||||||
|
{
|
||||||
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
|
||||||
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||||
|
struct ttm_operation_ctx ctx = { true, false };
|
||||||
|
u32 domain = amdgpu_display_supported_domains(adev);
|
||||||
|
int ret;
|
||||||
|
bool reads = (direction == DMA_BIDIRECTIONAL ||
|
||||||
|
direction == DMA_FROM_DEVICE);
|
||||||
|
|
||||||
|
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* move to gtt */
|
||||||
|
ret = amdgpu_bo_reserve(bo, false);
|
||||||
|
if (unlikely(ret != 0))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
|
||||||
|
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||||
|
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
amdgpu_bo_unreserve(bo);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||||
|
.attach = amdgpu_dma_buf_map_attach,
|
||||||
|
.detach = amdgpu_dma_buf_map_detach,
|
||||||
|
.map_dma_buf = drm_gem_map_dma_buf,
|
||||||
|
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
||||||
|
.release = drm_gem_dmabuf_release,
|
||||||
|
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
|
||||||
|
.mmap = drm_gem_dmabuf_mmap,
|
||||||
|
.vmap = drm_gem_dmabuf_vmap,
|
||||||
|
.vunmap = drm_gem_dmabuf_vunmap,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
|
||||||
|
* @dev: DRM device
|
||||||
|
* @gobj: GEM BO
|
||||||
|
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
|
||||||
|
*
|
||||||
|
* The main work is done by the &drm_gem_prime_export helper, which in turn
|
||||||
|
* uses &amdgpu_gem_prime_res_obj.
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* Shared DMA buffer representing the GEM BO from the given device.
|
||||||
|
*/
|
||||||
|
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||||
|
struct drm_gem_object *gobj,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
|
||||||
|
struct dma_buf *buf;
|
||||||
|
|
||||||
|
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
|
||||||
|
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
buf = drm_gem_prime_export(dev, gobj, flags);
|
||||||
|
if (!IS_ERR(buf)) {
|
||||||
|
buf->file->f_mapping = dev->anon_inode->i_mapping;
|
||||||
|
buf->ops = &amdgpu_dmabuf_ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
|
* amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
|
||||||
* implementation
|
* implementation
|
||||||
|
@ -188,235 +418,6 @@ error:
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
__reservation_object_make_exclusive(struct reservation_object *obj)
|
|
||||||
{
|
|
||||||
struct dma_fence **fences;
|
|
||||||
unsigned int count;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
if (count == 0) {
|
|
||||||
/* Now that was unexpected. */
|
|
||||||
} else if (count == 1) {
|
|
||||||
reservation_object_add_excl_fence(obj, fences[0]);
|
|
||||||
dma_fence_put(fences[0]);
|
|
||||||
kfree(fences);
|
|
||||||
} else {
|
|
||||||
struct dma_fence_array *array;
|
|
||||||
|
|
||||||
array = dma_fence_array_create(count, fences,
|
|
||||||
dma_fence_context_alloc(1), 0,
|
|
||||||
false);
|
|
||||||
if (!array)
|
|
||||||
goto err_fences_put;
|
|
||||||
|
|
||||||
reservation_object_add_excl_fence(obj, &array->base);
|
|
||||||
dma_fence_put(&array->base);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
err_fences_put:
|
|
||||||
while (count--)
|
|
||||||
dma_fence_put(fences[count]);
|
|
||||||
kfree(fences);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
|
|
||||||
* @dma_buf: Shared DMA buffer
|
|
||||||
* @attach: DMA-buf attachment
|
|
||||||
*
|
|
||||||
* Makes sure that the shared DMA buffer can be accessed by the target device.
|
|
||||||
* For now, simply pins it to the GTT domain, where it should be accessible by
|
|
||||||
* all DMA devices.
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* 0 on success or a negative error code on failure.
|
|
||||||
*/
|
|
||||||
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
|
|
||||||
struct dma_buf_attachment *attach)
|
|
||||||
{
|
|
||||||
struct drm_gem_object *obj = dma_buf->priv;
|
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
|
||||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
|
||||||
long r;
|
|
||||||
|
|
||||||
r = drm_gem_map_attach(dma_buf, attach);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = amdgpu_bo_reserve(bo, false);
|
|
||||||
if (unlikely(r != 0))
|
|
||||||
goto error_detach;
|
|
||||||
|
|
||||||
|
|
||||||
if (attach->dev->driver != adev->dev->driver) {
|
|
||||||
/*
|
|
||||||
* We only create shared fences for internal use, but importers
|
|
||||||
* of the dmabuf rely on exclusive fences for implicitly
|
|
||||||
* tracking write hazards. As any of the current fences may
|
|
||||||
* correspond to a write, we need to convert all existing
|
|
||||||
* fences on the reservation object into a single exclusive
|
|
||||||
* fence.
|
|
||||||
*/
|
|
||||||
r = __reservation_object_make_exclusive(bo->tbo.resv);
|
|
||||||
if (r)
|
|
||||||
goto error_unreserve;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pin buffer into GTT */
|
|
||||||
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
|
||||||
if (r)
|
|
||||||
goto error_unreserve;
|
|
||||||
|
|
||||||
if (attach->dev->driver != adev->dev->driver)
|
|
||||||
bo->prime_shared_count++;
|
|
||||||
|
|
||||||
error_unreserve:
|
|
||||||
amdgpu_bo_unreserve(bo);
|
|
||||||
|
|
||||||
error_detach:
|
|
||||||
if (r)
|
|
||||||
drm_gem_map_detach(dma_buf, attach);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
|
|
||||||
* @dma_buf: Shared DMA buffer
|
|
||||||
* @attach: DMA-buf attachment
|
|
||||||
*
|
|
||||||
* This is called when a shared DMA buffer no longer needs to be accessible by
|
|
||||||
* another device. For now, simply unpins the buffer from GTT.
|
|
||||||
*/
|
|
||||||
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
|
|
||||||
struct dma_buf_attachment *attach)
|
|
||||||
{
|
|
||||||
struct drm_gem_object *obj = dma_buf->priv;
|
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
|
||||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
ret = amdgpu_bo_reserve(bo, true);
|
|
||||||
if (unlikely(ret != 0))
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
amdgpu_bo_unpin(bo);
|
|
||||||
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
|
|
||||||
bo->prime_shared_count--;
|
|
||||||
amdgpu_bo_unreserve(bo);
|
|
||||||
|
|
||||||
error:
|
|
||||||
drm_gem_map_detach(dma_buf, attach);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
|
|
||||||
* @obj: GEM BO
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* The BO's reservation object.
|
|
||||||
*/
|
|
||||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
|
|
||||||
{
|
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
|
||||||
|
|
||||||
return bo->tbo.resv;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
|
|
||||||
* @dma_buf: Shared DMA buffer
|
|
||||||
* @direction: Direction of DMA transfer
|
|
||||||
*
|
|
||||||
* This is called before CPU access to the shared DMA buffer's memory. If it's
|
|
||||||
* a read access, the buffer is moved to the GTT domain if possible, for optimal
|
|
||||||
* CPU read performance.
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* 0 on success or a negative error code on failure.
|
|
||||||
*/
|
|
||||||
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
|
|
||||||
enum dma_data_direction direction)
|
|
||||||
{
|
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
|
|
||||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
|
||||||
struct ttm_operation_ctx ctx = { true, false };
|
|
||||||
u32 domain = amdgpu_display_supported_domains(adev);
|
|
||||||
int ret;
|
|
||||||
bool reads = (direction == DMA_BIDIRECTIONAL ||
|
|
||||||
direction == DMA_FROM_DEVICE);
|
|
||||||
|
|
||||||
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* move to gtt */
|
|
||||||
ret = amdgpu_bo_reserve(bo, false);
|
|
||||||
if (unlikely(ret != 0))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
|
|
||||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
|
||||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
amdgpu_bo_unreserve(bo);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
|
||||||
.attach = amdgpu_gem_map_attach,
|
|
||||||
.detach = amdgpu_gem_map_detach,
|
|
||||||
.map_dma_buf = drm_gem_map_dma_buf,
|
|
||||||
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
|
||||||
.release = drm_gem_dmabuf_release,
|
|
||||||
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
|
|
||||||
.mmap = drm_gem_dmabuf_mmap,
|
|
||||||
.vmap = drm_gem_dmabuf_vmap,
|
|
||||||
.vunmap = drm_gem_dmabuf_vunmap,
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
|
|
||||||
* @dev: DRM device
|
|
||||||
* @gobj: GEM BO
|
|
||||||
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
|
|
||||||
*
|
|
||||||
* The main work is done by the &drm_gem_prime_export helper, which in turn
|
|
||||||
* uses &amdgpu_gem_prime_res_obj.
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* Shared DMA buffer representing the GEM BO from the given device.
|
|
||||||
*/
|
|
||||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
|
||||||
struct drm_gem_object *gobj,
|
|
||||||
int flags)
|
|
||||||
{
|
|
||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
|
|
||||||
struct dma_buf *buf;
|
|
||||||
|
|
||||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
|
|
||||||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
|
||||||
return ERR_PTR(-EPERM);
|
|
||||||
|
|
||||||
buf = drm_gem_prime_export(dev, gobj, flags);
|
|
||||||
if (!IS_ERR(buf)) {
|
|
||||||
buf->file->f_mapping = dev->anon_inode->i_mapping;
|
|
||||||
buf->ops = &amdgpu_dmabuf_ops;
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
|
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
|
||||||
* @dev: DRM device
|
* @dev: DRM device
|
|
@ -0,0 +1,46 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#ifndef __AMDGPU_DMA_BUF_H__
|
||||||
|
#define __AMDGPU_DMA_BUF_H__
|
||||||
|
|
||||||
|
#include <drm/drm_gem.h>
|
||||||
|
|
||||||
|
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
|
||||||
|
struct drm_gem_object *
|
||||||
|
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
|
||||||
|
struct dma_buf_attachment *attach,
|
||||||
|
struct sg_table *sg);
|
||||||
|
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||||
|
struct drm_gem_object *gobj,
|
||||||
|
int flags);
|
||||||
|
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
|
||||||
|
struct dma_buf *dma_buf);
|
||||||
|
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
|
||||||
|
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
||||||
|
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||||
|
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
|
||||||
|
struct vm_area_struct *vma);
|
||||||
|
|
||||||
|
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||||
|
|
||||||
|
#endif
|
|
@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
|
||||||
int min_temp;
|
int min_temp;
|
||||||
/* high temperature threshold */
|
/* high temperature threshold */
|
||||||
int max_temp;
|
int max_temp;
|
||||||
|
/* edge max emergency(shutdown) temp */
|
||||||
|
int max_edge_emergency_temp;
|
||||||
|
/* hotspot low temperature threshold */
|
||||||
|
int min_hotspot_temp;
|
||||||
|
/* hotspot high temperature critical threshold */
|
||||||
|
int max_hotspot_crit_temp;
|
||||||
|
/* hotspot max emergency(shutdown) temp */
|
||||||
|
int max_hotspot_emergency_temp;
|
||||||
|
/* memory low temperature threshold */
|
||||||
|
int min_mem_temp;
|
||||||
|
/* memory high temperature critical threshold */
|
||||||
|
int max_mem_crit_temp;
|
||||||
|
/* memory max emergency(shutdown) temp */
|
||||||
|
int max_mem_emergency_temp;
|
||||||
/* was last interrupt low to high or high to low */
|
/* was last interrupt low to high or high to low */
|
||||||
bool high_to_low;
|
bool high_to_low;
|
||||||
/* interrupt source */
|
/* interrupt source */
|
||||||
|
|
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#include "amdgpu_irq.h"
|
#include "amdgpu_irq.h"
|
||||||
#include "amdgpu_gem.h"
|
#include "amdgpu_dma_buf.h"
|
||||||
|
|
||||||
#include "amdgpu_amdkfd.h"
|
#include "amdgpu_amdkfd.h"
|
||||||
|
|
||||||
|
@ -81,6 +81,8 @@
|
||||||
#define KMS_DRIVER_MINOR 32
|
#define KMS_DRIVER_MINOR 32
|
||||||
#define KMS_DRIVER_PATCHLEVEL 0
|
#define KMS_DRIVER_PATCHLEVEL 0
|
||||||
|
|
||||||
|
#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
|
||||||
|
|
||||||
int amdgpu_vram_limit = 0;
|
int amdgpu_vram_limit = 0;
|
||||||
int amdgpu_vis_vram_limit = 0;
|
int amdgpu_vis_vram_limit = 0;
|
||||||
int amdgpu_gart_size = -1; /* auto */
|
int amdgpu_gart_size = -1; /* auto */
|
||||||
|
@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0;
|
||||||
int amdgpu_hw_i2c = 0;
|
int amdgpu_hw_i2c = 0;
|
||||||
int amdgpu_pcie_gen2 = -1;
|
int amdgpu_pcie_gen2 = -1;
|
||||||
int amdgpu_msi = -1;
|
int amdgpu_msi = -1;
|
||||||
int amdgpu_lockup_timeout = 10000;
|
char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
|
||||||
int amdgpu_dpm = -1;
|
int amdgpu_dpm = -1;
|
||||||
int amdgpu_fw_load_type = -1;
|
int amdgpu_fw_load_type = -1;
|
||||||
int amdgpu_aspm = -1;
|
int amdgpu_aspm = -1;
|
||||||
|
@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
|
||||||
module_param_named(msi, amdgpu_msi, int, 0444);
|
module_param_named(msi, amdgpu_msi, int, 0444);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DOC: lockup_timeout (int)
|
* DOC: lockup_timeout (string)
|
||||||
* Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
|
* Set GPU scheduler timeout value in ms.
|
||||||
* Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
|
*
|
||||||
|
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
|
||||||
|
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
|
||||||
|
* to default timeout.
|
||||||
|
* - With one value specified, the setting will apply to all non-compute jobs.
|
||||||
|
* - With multiple values specified, the first one will be for GFX. The second one is for Compute.
|
||||||
|
* And the third and fourth ones are for SDMA and Video.
|
||||||
|
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
|
||||||
|
* jobs is 10000. And there is no timeout enforced on compute jobs.
|
||||||
*/
|
*/
|
||||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
|
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
|
||||||
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
|
"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
|
||||||
|
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DOC: dpm (int)
|
* DOC: dpm (int)
|
||||||
|
@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry,
|
||||||
int halt_if_hws_hang;
|
int halt_if_hws_hang;
|
||||||
module_param(halt_if_hws_hang, int, 0644);
|
module_param(halt_if_hws_hang, int, 0644);
|
||||||
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
|
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: hws_gws_support(bool)
|
||||||
|
* Whether HWS support gws barriers. Default value: false (not supported)
|
||||||
|
* This will be replaced with a MEC firmware version check once firmware
|
||||||
|
* is ready
|
||||||
|
*/
|
||||||
|
bool hws_gws_support;
|
||||||
|
module_param(hws_gws_support, bool, 0444);
|
||||||
|
MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
char *input = amdgpu_lockup_timeout;
|
||||||
|
char *timeout_setting = NULL;
|
||||||
|
int index = 0;
|
||||||
|
long timeout;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* By default timeout for non compute jobs is 10000.
|
||||||
|
* And there is no timeout enforced on compute jobs.
|
||||||
|
*/
|
||||||
|
adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
|
||||||
|
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
|
||||||
|
|
||||||
|
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
|
||||||
|
while ((timeout_setting = strsep(&input, ",")) &&
|
||||||
|
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
|
||||||
|
ret = kstrtol(timeout_setting, 0, &timeout);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/* Invalidate 0 and negative values */
|
||||||
|
if (timeout <= 0) {
|
||||||
|
index++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (index++) {
|
||||||
|
case 0:
|
||||||
|
adev->gfx_timeout = timeout;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
adev->compute_timeout = timeout;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
adev->sdma_timeout = timeout;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
adev->video_timeout = timeout;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* There is only one value specified and
|
||||||
|
* it should apply to all non-compute jobs.
|
||||||
|
*/
|
||||||
|
if (index == 1)
|
||||||
|
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
|
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
|
||||||
bool in_vblank_irq, int *vpos, int *hpos,
|
bool in_vblank_irq, int *vpos, int *hpos,
|
||||||
|
@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = {
|
||||||
.driver_features =
|
.driver_features =
|
||||||
DRIVER_USE_AGP | DRIVER_ATOMIC |
|
DRIVER_USE_AGP | DRIVER_ATOMIC |
|
||||||
DRIVER_GEM |
|
DRIVER_GEM |
|
||||||
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
|
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
|
||||||
|
DRIVER_SYNCOBJ_TIMELINE,
|
||||||
.load = amdgpu_driver_load_kms,
|
.load = amdgpu_driver_load_kms,
|
||||||
.open = amdgpu_driver_open_kms,
|
.open = amdgpu_driver_open_kms,
|
||||||
.postclose = amdgpu_driver_postclose_kms,
|
.postclose = amdgpu_driver_postclose_kms,
|
||||||
|
|
|
@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
||||||
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
||||||
unsigned num_hw_submission)
|
unsigned num_hw_submission)
|
||||||
{
|
{
|
||||||
|
struct amdgpu_device *adev = ring->adev;
|
||||||
long timeout;
|
long timeout;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
if (!adev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
/* Check that num_hw_submission is a power of two */
|
/* Check that num_hw_submission is a power of two */
|
||||||
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
|
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
||||||
|
|
||||||
/* No need to setup the GPU scheduler for KIQ ring */
|
/* No need to setup the GPU scheduler for KIQ ring */
|
||||||
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
|
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
|
||||||
/* for non-sriov case, no timeout enforce on compute ring */
|
switch (ring->funcs->type) {
|
||||||
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
|
case AMDGPU_RING_TYPE_GFX:
|
||||||
&& !amdgpu_sriov_vf(ring->adev))
|
timeout = adev->gfx_timeout;
|
||||||
timeout = MAX_SCHEDULE_TIMEOUT;
|
break;
|
||||||
else
|
case AMDGPU_RING_TYPE_COMPUTE:
|
||||||
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
|
/*
|
||||||
|
* For non-sriov case, no timeout enforce
|
||||||
|
* on compute ring by default. Unless user
|
||||||
|
* specifies a timeout for compute ring.
|
||||||
|
*
|
||||||
|
* For sriov case, always use the timeout
|
||||||
|
* as gfx ring
|
||||||
|
*/
|
||||||
|
if (!amdgpu_sriov_vf(ring->adev))
|
||||||
|
timeout = adev->compute_timeout;
|
||||||
|
else
|
||||||
|
timeout = adev->gfx_timeout;
|
||||||
|
break;
|
||||||
|
case AMDGPU_RING_TYPE_SDMA:
|
||||||
|
timeout = adev->sdma_timeout;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
timeout = adev->video_timeout;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
|
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
|
||||||
num_hw_submission, amdgpu_job_hang_limit,
|
num_hw_submission, amdgpu_job_hang_limit,
|
||||||
|
|
|
@ -27,26 +27,11 @@
|
||||||
struct amdgpu_ring;
|
struct amdgpu_ring;
|
||||||
struct amdgpu_bo;
|
struct amdgpu_bo;
|
||||||
|
|
||||||
struct amdgpu_gds_asic_info {
|
|
||||||
uint32_t total_size;
|
|
||||||
uint32_t gfx_partition_size;
|
|
||||||
uint32_t cs_partition_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct amdgpu_gds {
|
struct amdgpu_gds {
|
||||||
struct amdgpu_gds_asic_info mem;
|
uint32_t gds_size;
|
||||||
struct amdgpu_gds_asic_info gws;
|
uint32_t gws_size;
|
||||||
struct amdgpu_gds_asic_info oa;
|
uint32_t oa_size;
|
||||||
uint32_t gds_compute_max_wave_id;
|
uint32_t gds_compute_max_wave_id;
|
||||||
|
|
||||||
/* At present, GDS, GWS and OA resources for gfx (graphics)
|
|
||||||
* is always pre-allocated and available for graphics operation.
|
|
||||||
* Such resource is shared between all gfx clients.
|
|
||||||
* TODO: move this operation to user space
|
|
||||||
* */
|
|
||||||
struct amdgpu_bo* gds_gfx_bo;
|
|
||||||
struct amdgpu_bo* gws_gfx_bo;
|
|
||||||
struct amdgpu_bo* oa_gfx_bo;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_gds_reg_offset {
|
struct amdgpu_gds_reg_offset {
|
||||||
|
|
|
@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
||||||
|
|
||||||
r = amdgpu_bo_reserve(bo, true);
|
r = amdgpu_bo_reserve(bo, true);
|
||||||
if (r)
|
if (r)
|
||||||
goto free_pages;
|
goto user_pages_done;
|
||||||
|
|
||||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||||
amdgpu_bo_unreserve(bo);
|
amdgpu_bo_unreserve(bo);
|
||||||
if (r)
|
if (r)
|
||||||
goto free_pages;
|
goto user_pages_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = drm_gem_handle_create(filp, gobj, &handle);
|
r = drm_gem_handle_create(filp, gobj, &handle);
|
||||||
/* drop reference from allocate - handle holds it now */
|
|
||||||
drm_gem_object_put_unlocked(gobj);
|
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto user_pages_done;
|
||||||
|
|
||||||
args->handle = handle;
|
args->handle = handle;
|
||||||
return 0;
|
|
||||||
|
|
||||||
free_pages:
|
user_pages_done:
|
||||||
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
|
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
|
||||||
|
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||||
|
|
||||||
release_object:
|
release_object:
|
||||||
drm_gem_object_put_unlocked(gobj);
|
drm_gem_object_put_unlocked(gobj);
|
||||||
|
|
|
@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
|
||||||
void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||||
struct drm_file *file_priv);
|
struct drm_file *file_priv);
|
||||||
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
|
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
|
||||||
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
|
|
||||||
struct drm_gem_object *
|
|
||||||
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
|
|
||||||
struct dma_buf_attachment *attach,
|
|
||||||
struct sg_table *sg);
|
|
||||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
|
||||||
struct drm_gem_object *gobj,
|
|
||||||
int flags);
|
|
||||||
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
|
|
||||||
struct dma_buf *dma_buf);
|
|
||||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
|
|
||||||
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
|
||||||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
|
||||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
|
|
||||||
|
|
||||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GEM objects.
|
* GEM objects.
|
||||||
|
|
|
@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
|
||||||
|
|
||||||
if (amdgpu_device_should_recover_gpu(ring->adev))
|
if (amdgpu_device_should_recover_gpu(ring->adev))
|
||||||
amdgpu_device_gpu_recover(ring->adev, job);
|
amdgpu_device_gpu_recover(ring->adev, job);
|
||||||
|
else
|
||||||
|
drm_sched_suspend_timeout(&ring->sched);
|
||||||
}
|
}
|
||||||
|
|
||||||
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
|
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
|
||||||
|
|
|
@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
|
||||||
struct drm_amdgpu_info_gds gds_info;
|
struct drm_amdgpu_info_gds gds_info;
|
||||||
|
|
||||||
memset(&gds_info, 0, sizeof(gds_info));
|
memset(&gds_info, 0, sizeof(gds_info));
|
||||||
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
|
gds_info.compute_partition_size = adev->gds.gds_size;
|
||||||
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
|
gds_info.gds_total_size = adev->gds.gds_size;
|
||||||
gds_info.gds_total_size = adev->gds.mem.total_size;
|
gds_info.gws_per_compute_partition = adev->gds.gws_size;
|
||||||
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
|
gds_info.oa_per_compute_partition = adev->gds.oa_size;
|
||||||
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
|
|
||||||
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
|
|
||||||
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
|
|
||||||
return copy_to_user(out, &gds_info,
|
return copy_to_user(out, &gds_info,
|
||||||
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
|
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@
|
||||||
|
|
||||||
#include <linux/firmware.h>
|
#include <linux/firmware.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/hmm.h>
|
||||||
#include <linux/interval_tree.h>
|
#include <linux/interval_tree.h>
|
||||||
#include <drm/drmP.h>
|
#include <drm/drmP.h>
|
||||||
#include <drm/drm.h>
|
#include <drm/drm.h>
|
||||||
|
@ -58,14 +58,12 @@
|
||||||
*
|
*
|
||||||
* @adev: amdgpu device pointer
|
* @adev: amdgpu device pointer
|
||||||
* @mm: process address space
|
* @mm: process address space
|
||||||
* @mn: MMU notifier structure
|
|
||||||
* @type: type of MMU notifier
|
* @type: type of MMU notifier
|
||||||
* @work: destruction work item
|
* @work: destruction work item
|
||||||
* @node: hash table node to find structure by adev and mn
|
* @node: hash table node to find structure by adev and mn
|
||||||
* @lock: rw semaphore protecting the notifier nodes
|
* @lock: rw semaphore protecting the notifier nodes
|
||||||
* @objects: interval tree containing amdgpu_mn_nodes
|
* @objects: interval tree containing amdgpu_mn_nodes
|
||||||
* @read_lock: mutex for recursive locking of @lock
|
* @mirror: HMM mirror function support
|
||||||
* @recursion: depth of recursion
|
|
||||||
*
|
*
|
||||||
* Data for each amdgpu device and process address space.
|
* Data for each amdgpu device and process address space.
|
||||||
*/
|
*/
|
||||||
|
@ -73,7 +71,6 @@ struct amdgpu_mn {
|
||||||
/* constant after initialisation */
|
/* constant after initialisation */
|
||||||
struct amdgpu_device *adev;
|
struct amdgpu_device *adev;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
struct mmu_notifier mn;
|
|
||||||
enum amdgpu_mn_type type;
|
enum amdgpu_mn_type type;
|
||||||
|
|
||||||
/* only used on destruction */
|
/* only used on destruction */
|
||||||
|
@ -85,8 +82,9 @@ struct amdgpu_mn {
|
||||||
/* objects protected by lock */
|
/* objects protected by lock */
|
||||||
struct rw_semaphore lock;
|
struct rw_semaphore lock;
|
||||||
struct rb_root_cached objects;
|
struct rb_root_cached objects;
|
||||||
struct mutex read_lock;
|
|
||||||
atomic_t recursion;
|
/* HMM mirror */
|
||||||
|
struct hmm_mirror mirror;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -103,7 +101,7 @@ struct amdgpu_mn_node {
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_destroy - destroy the MMU notifier
|
* amdgpu_mn_destroy - destroy the HMM mirror
|
||||||
*
|
*
|
||||||
* @work: previously sheduled work item
|
* @work: previously sheduled work item
|
||||||
*
|
*
|
||||||
|
@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
|
||||||
}
|
}
|
||||||
up_write(&amn->lock);
|
up_write(&amn->lock);
|
||||||
mutex_unlock(&adev->mn_lock);
|
mutex_unlock(&adev->mn_lock);
|
||||||
mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
|
|
||||||
|
hmm_mirror_unregister(&amn->mirror);
|
||||||
kfree(amn);
|
kfree(amn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_release - callback to notify about mm destruction
|
* amdgpu_hmm_mirror_release - callback to notify about mm destruction
|
||||||
*
|
*
|
||||||
* @mn: our notifier
|
* @mirror: the HMM mirror (mm) this callback is about
|
||||||
* @mm: the mm this callback is about
|
|
||||||
*
|
*
|
||||||
* Shedule a work item to lazy destroy our notifier.
|
* Shedule a work item to lazy destroy HMM mirror.
|
||||||
*/
|
*/
|
||||||
static void amdgpu_mn_release(struct mmu_notifier *mn,
|
static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
|
||||||
struct mm_struct *mm)
|
|
||||||
{
|
{
|
||||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||||
|
|
||||||
INIT_WORK(&amn->work, amdgpu_mn_destroy);
|
INIT_WORK(&amn->work, amdgpu_mn_destroy);
|
||||||
schedule_work(&amn->work);
|
schedule_work(&amn->work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_lock - take the write side lock for this notifier
|
* amdgpu_mn_lock - take the write side lock for this notifier
|
||||||
*
|
*
|
||||||
|
@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
|
||||||
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
|
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
|
||||||
{
|
{
|
||||||
if (blockable)
|
if (blockable)
|
||||||
mutex_lock(&amn->read_lock);
|
down_read(&amn->lock);
|
||||||
else if (!mutex_trylock(&amn->read_lock))
|
else if (!down_read_trylock(&amn->lock))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
if (atomic_inc_return(&amn->recursion) == 1)
|
|
||||||
down_read_non_owner(&amn->lock);
|
|
||||||
mutex_unlock(&amn->read_lock);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
|
||||||
*/
|
*/
|
||||||
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
|
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
|
||||||
{
|
{
|
||||||
if (atomic_dec_return(&amn->recursion) == 0)
|
up_read(&amn->lock);
|
||||||
up_read_non_owner(&amn->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
|
||||||
true, false, MAX_SCHEDULE_TIMEOUT);
|
true, false, MAX_SCHEDULE_TIMEOUT);
|
||||||
if (r <= 0)
|
if (r <= 0)
|
||||||
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
|
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
|
||||||
|
|
||||||
amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
|
* amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
|
||||||
*
|
*
|
||||||
* @mn: our notifier
|
* @mirror: the hmm_mirror (mm) is about to update
|
||||||
* @range: mmu notifier context
|
* @update: the update start, end address
|
||||||
*
|
*
|
||||||
* Block for operations on BOs to finish and mark pages as accessed and
|
* Block for operations on BOs to finish and mark pages as accessed and
|
||||||
* potentially dirty.
|
* potentially dirty.
|
||||||
*/
|
*/
|
||||||
static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
|
static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
|
||||||
const struct mmu_notifier_range *range)
|
const struct hmm_update *update)
|
||||||
{
|
{
|
||||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||||
|
unsigned long start = update->start;
|
||||||
|
unsigned long end = update->end;
|
||||||
|
bool blockable = update->blockable;
|
||||||
struct interval_tree_node *it;
|
struct interval_tree_node *it;
|
||||||
unsigned long end;
|
|
||||||
|
|
||||||
/* notification is exclusive, but interval is inclusive */
|
/* notification is exclusive, but interval is inclusive */
|
||||||
end = range->end - 1;
|
end -= 1;
|
||||||
|
|
||||||
/* TODO we should be able to split locking for interval tree and
|
/* TODO we should be able to split locking for interval tree and
|
||||||
* amdgpu_mn_invalidate_node
|
* amdgpu_mn_invalidate_node
|
||||||
*/
|
*/
|
||||||
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
|
if (amdgpu_mn_read_lock(amn, blockable))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
it = interval_tree_iter_first(&amn->objects, range->start, end);
|
it = interval_tree_iter_first(&amn->objects, start, end);
|
||||||
while (it) {
|
while (it) {
|
||||||
struct amdgpu_mn_node *node;
|
struct amdgpu_mn_node *node;
|
||||||
|
|
||||||
if (!mmu_notifier_range_blockable(range)) {
|
if (!blockable) {
|
||||||
amdgpu_mn_read_unlock(amn);
|
amdgpu_mn_read_unlock(amn);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = container_of(it, struct amdgpu_mn_node, it);
|
node = container_of(it, struct amdgpu_mn_node, it);
|
||||||
it = interval_tree_iter_next(it, range->start, end);
|
it = interval_tree_iter_next(it, start, end);
|
||||||
|
|
||||||
amdgpu_mn_invalidate_node(node, range->start, end);
|
amdgpu_mn_invalidate_node(node, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amdgpu_mn_read_unlock(amn);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
|
* amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
|
||||||
*
|
*
|
||||||
* @mn: our notifier
|
* @mirror: the hmm_mirror (mm) is about to update
|
||||||
* @mm: the mm this callback is about
|
* @update: the update start, end address
|
||||||
* @start: start of updated range
|
|
||||||
* @end: end of updated range
|
|
||||||
*
|
*
|
||||||
* We temporarily evict all BOs between start and end. This
|
* We temporarily evict all BOs between start and end. This
|
||||||
* necessitates evicting all user-mode queues of the process. The BOs
|
* necessitates evicting all user-mode queues of the process. The BOs
|
||||||
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
|
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
|
||||||
*/
|
*/
|
||||||
static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
|
static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
|
||||||
const struct mmu_notifier_range *range)
|
const struct hmm_update *update)
|
||||||
{
|
{
|
||||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||||
|
unsigned long start = update->start;
|
||||||
|
unsigned long end = update->end;
|
||||||
|
bool blockable = update->blockable;
|
||||||
struct interval_tree_node *it;
|
struct interval_tree_node *it;
|
||||||
unsigned long end;
|
|
||||||
|
|
||||||
/* notification is exclusive, but interval is inclusive */
|
/* notification is exclusive, but interval is inclusive */
|
||||||
end = range->end - 1;
|
end -= 1;
|
||||||
|
|
||||||
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
|
if (amdgpu_mn_read_lock(amn, blockable))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
it = interval_tree_iter_first(&amn->objects, range->start, end);
|
it = interval_tree_iter_first(&amn->objects, start, end);
|
||||||
while (it) {
|
while (it) {
|
||||||
struct amdgpu_mn_node *node;
|
struct amdgpu_mn_node *node;
|
||||||
struct amdgpu_bo *bo;
|
struct amdgpu_bo *bo;
|
||||||
|
|
||||||
if (!mmu_notifier_range_blockable(range)) {
|
if (!blockable) {
|
||||||
amdgpu_mn_read_unlock(amn);
|
amdgpu_mn_read_unlock(amn);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = container_of(it, struct amdgpu_mn_node, it);
|
node = container_of(it, struct amdgpu_mn_node, it);
|
||||||
it = interval_tree_iter_next(it, range->start, end);
|
it = interval_tree_iter_next(it, start, end);
|
||||||
|
|
||||||
list_for_each_entry(bo, &node->bos, mn_list) {
|
list_for_each_entry(bo, &node->bos, mn_list) {
|
||||||
struct kgd_mem *mem = bo->kfd_bo;
|
struct kgd_mem *mem = bo->kfd_bo;
|
||||||
|
|
||||||
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
|
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
|
||||||
range->start,
|
start, end))
|
||||||
end))
|
amdgpu_amdkfd_evict_userptr(mem, amn->mm);
|
||||||
amdgpu_amdkfd_evict_userptr(mem, range->mm);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amdgpu_mn_read_unlock(amn);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
|
|
||||||
*
|
|
||||||
* @mn: our notifier
|
|
||||||
* @mm: the mm this callback is about
|
|
||||||
* @start: start of updated range
|
|
||||||
* @end: end of updated range
|
|
||||||
*
|
|
||||||
* Release the lock again to allow new command submissions.
|
|
||||||
*/
|
|
||||||
static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
|
|
||||||
const struct mmu_notifier_range *range)
|
|
||||||
{
|
|
||||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
|
||||||
|
|
||||||
amdgpu_mn_read_unlock(amn);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
|
|
||||||
[AMDGPU_MN_TYPE_GFX] = {
|
|
||||||
.release = amdgpu_mn_release,
|
|
||||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
|
|
||||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
|
||||||
},
|
|
||||||
[AMDGPU_MN_TYPE_HSA] = {
|
|
||||||
.release = amdgpu_mn_release,
|
|
||||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
|
|
||||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Low bits of any reasonable mm pointer will be unused due to struct
|
/* Low bits of any reasonable mm pointer will be unused due to struct
|
||||||
* alignment. Use these bits to make a unique key from the mm pointer
|
* alignment. Use these bits to make a unique key from the mm pointer
|
||||||
* and notifier type.
|
* and notifier type.
|
||||||
*/
|
*/
|
||||||
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
|
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
|
||||||
|
|
||||||
|
static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
|
||||||
|
[AMDGPU_MN_TYPE_GFX] = {
|
||||||
|
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
|
||||||
|
.release = amdgpu_hmm_mirror_release
|
||||||
|
},
|
||||||
|
[AMDGPU_MN_TYPE_HSA] = {
|
||||||
|
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
|
||||||
|
.release = amdgpu_hmm_mirror_release
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_get - create notifier context
|
* amdgpu_mn_get - create HMM mirror context
|
||||||
*
|
*
|
||||||
* @adev: amdgpu device pointer
|
* @adev: amdgpu device pointer
|
||||||
* @type: type of MMU notifier context
|
* @type: type of MMU notifier context
|
||||||
*
|
*
|
||||||
* Creates a notifier context for current->mm.
|
* Creates a HMM mirror context for current->mm.
|
||||||
*/
|
*/
|
||||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||||
enum amdgpu_mn_type type)
|
enum amdgpu_mn_type type)
|
||||||
|
@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||||
amn->mm = mm;
|
amn->mm = mm;
|
||||||
init_rwsem(&amn->lock);
|
init_rwsem(&amn->lock);
|
||||||
amn->type = type;
|
amn->type = type;
|
||||||
amn->mn.ops = &amdgpu_mn_ops[type];
|
|
||||||
amn->objects = RB_ROOT_CACHED;
|
amn->objects = RB_ROOT_CACHED;
|
||||||
mutex_init(&amn->read_lock);
|
|
||||||
atomic_set(&amn->recursion, 0);
|
|
||||||
|
|
||||||
r = __mmu_notifier_register(&amn->mn, mm);
|
amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
|
||||||
|
r = hmm_mirror_register(&amn->mirror, mm);
|
||||||
if (r)
|
if (r)
|
||||||
goto free_amn;
|
goto free_amn;
|
||||||
|
|
||||||
|
@ -432,7 +404,7 @@ free_amn:
|
||||||
* @bo: amdgpu buffer object
|
* @bo: amdgpu buffer object
|
||||||
* @addr: userptr addr we should monitor
|
* @addr: userptr addr we should monitor
|
||||||
*
|
*
|
||||||
* Registers an MMU notifier for the given BO at the specified address.
|
* Registers an HMM mirror for the given BO at the specified address.
|
||||||
* Returns 0 on success, -ERRNO if anything goes wrong.
|
* Returns 0 on success, -ERRNO if anything goes wrong.
|
||||||
*/
|
*/
|
||||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||||
|
@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_mn_unregister - unregister a BO for notifier updates
|
* amdgpu_mn_unregister - unregister a BO for HMM mirror updates
|
||||||
*
|
*
|
||||||
* @bo: amdgpu buffer object
|
* @bo: amdgpu buffer object
|
||||||
*
|
*
|
||||||
* Remove any registration of MMU notifier updates from the buffer object.
|
* Remove any registration of HMM mirror updates from the buffer object.
|
||||||
*/
|
*/
|
||||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
||||||
{
|
{
|
||||||
|
@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
||||||
mutex_unlock(&adev->mn_lock);
|
mutex_unlock(&adev->mn_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
|
||||||
|
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
|
||||||
|
(1 << 0), /* HMM_PFN_VALID */
|
||||||
|
(1 << 1), /* HMM_PFN_WRITE */
|
||||||
|
0 /* HMM_PFN_DEVICE_PRIVATE */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
|
||||||
|
0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
|
||||||
|
0, /* HMM_PFN_NONE */
|
||||||
|
0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
|
||||||
|
};
|
||||||
|
|
||||||
|
void amdgpu_hmm_init_range(struct hmm_range *range)
|
||||||
|
{
|
||||||
|
if (range) {
|
||||||
|
range->flags = hmm_range_flags;
|
||||||
|
range->values = hmm_range_values;
|
||||||
|
range->pfn_shift = PAGE_SHIFT;
|
||||||
|
range->pfns = NULL;
|
||||||
|
INIT_LIST_HEAD(&range->list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -25,22 +25,24 @@
|
||||||
#define __AMDGPU_MN_H__
|
#define __AMDGPU_MN_H__
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MMU Notifier
|
* HMM mirror
|
||||||
*/
|
*/
|
||||||
struct amdgpu_mn;
|
struct amdgpu_mn;
|
||||||
|
struct hmm_range;
|
||||||
|
|
||||||
enum amdgpu_mn_type {
|
enum amdgpu_mn_type {
|
||||||
AMDGPU_MN_TYPE_GFX,
|
AMDGPU_MN_TYPE_GFX,
|
||||||
AMDGPU_MN_TYPE_HSA,
|
AMDGPU_MN_TYPE_HSA,
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(CONFIG_MMU_NOTIFIER)
|
#if defined(CONFIG_HMM_MIRROR)
|
||||||
void amdgpu_mn_lock(struct amdgpu_mn *mn);
|
void amdgpu_mn_lock(struct amdgpu_mn *mn);
|
||||||
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
|
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
|
||||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||||
enum amdgpu_mn_type type);
|
enum amdgpu_mn_type type);
|
||||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
|
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
|
||||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
|
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
|
||||||
|
void amdgpu_hmm_init_range(struct hmm_range *range);
|
||||||
#else
|
#else
|
||||||
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
|
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
|
||||||
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
|
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
|
||||||
|
@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||||
}
|
}
|
||||||
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||||
{
|
{
|
||||||
|
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
|
||||||
|
"add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
|
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
|
||||||
|
|
|
@ -331,8 +331,6 @@ struct amdgpu_mode_info {
|
||||||
struct drm_property *audio_property;
|
struct drm_property *audio_property;
|
||||||
/* FMT dithering */
|
/* FMT dithering */
|
||||||
struct drm_property *dither_property;
|
struct drm_property *dither_property;
|
||||||
/* maximum number of bits per channel for monitor color */
|
|
||||||
struct drm_property *max_bpc_property;
|
|
||||||
/* Adaptive Backlight Modulation (power feature) */
|
/* Adaptive Backlight Modulation (power feature) */
|
||||||
struct drm_property *abm_level_property;
|
struct drm_property *abm_level_property;
|
||||||
/* hardcoded DFP edid from BIOS */
|
/* hardcoded DFP edid from BIOS */
|
||||||
|
|
|
@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {
|
||||||
{0, NULL},
|
{0, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct hwmon_temp_label {
|
||||||
|
enum PP_HWMON_TEMP channel;
|
||||||
|
const char *label;
|
||||||
|
} temp_label[] = {
|
||||||
|
{PP_TEMP_EDGE, "edge"},
|
||||||
|
{PP_TEMP_JUNCTION, "junction"},
|
||||||
|
{PP_TEMP_MEM, "mem"},
|
||||||
|
};
|
||||||
|
|
||||||
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
|
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
if (adev->pm.dpm_enabled) {
|
if (adev->pm.dpm_enabled) {
|
||||||
|
@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
||||||
|
|
||||||
pr_debug("featuremask = 0x%llx\n", featuremask);
|
pr_debug("featuremask = 0x%llx\n", featuremask);
|
||||||
|
|
||||||
if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
if (is_support_sw_smu(adev)) {
|
||||||
|
ret = smu_set_ppfeature_status(&adev->smu, featuremask);
|
||||||
|
if (ret)
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
||||||
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
|
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
|
||||||
if (ret)
|
if (ret)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
|
||||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||||
struct amdgpu_device *adev = ddev->dev_private;
|
struct amdgpu_device *adev = ddev->dev_private;
|
||||||
|
|
||||||
if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
if (is_support_sw_smu(adev)) {
|
||||||
|
return smu_get_ppfeature_status(&adev->smu, buf);
|
||||||
|
} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
||||||
return amdgpu_dpm_get_ppfeature_status(adev, buf);
|
return amdgpu_dpm_get_ppfeature_status(adev, buf);
|
||||||
|
|
||||||
return snprintf(buf, PAGE_SIZE, "\n");
|
return snprintf(buf, PAGE_SIZE, "\n");
|
||||||
|
@ -1302,6 +1317,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
|
||||||
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: mem_busy_percent
|
||||||
|
*
|
||||||
|
* The amdgpu driver provides a sysfs API for reading how busy the VRAM
|
||||||
|
* is as a percentage. The file mem_busy_percent is used for this.
|
||||||
|
* The SMU firmware computes a percentage of load based on the
|
||||||
|
* aggregate activity level in the IP cores.
|
||||||
|
*/
|
||||||
|
static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||||
|
struct amdgpu_device *adev = ddev->dev_private;
|
||||||
|
int r, value, size = sizeof(value);
|
||||||
|
|
||||||
|
/* read the IP busy sensor */
|
||||||
|
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
|
||||||
|
(void *)&value, &size);
|
||||||
|
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DOC: pcie_bw
|
* DOC: pcie_bw
|
||||||
*
|
*
|
||||||
|
@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
|
||||||
count0, count1, pcie_get_mps(adev->pdev));
|
count0, count1, pcie_get_mps(adev->pdev));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: unique_id
|
||||||
|
*
|
||||||
|
* The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
|
||||||
|
* The file unique_id is used for this.
|
||||||
|
* This will provide a Unique ID that will persist from machine to machine
|
||||||
|
*
|
||||||
|
* NOTE: This will only work for GFX9 and newer. This file will be absent
|
||||||
|
* on unsupported ASICs (GFX8 and older)
|
||||||
|
*/
|
||||||
|
static ssize_t amdgpu_get_unique_id(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||||
|
struct amdgpu_device *adev = ddev->dev_private;
|
||||||
|
|
||||||
|
if (adev->unique_id)
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
|
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
|
||||||
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
|
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
|
||||||
amdgpu_get_dpm_forced_performance_level,
|
amdgpu_get_dpm_forced_performance_level,
|
||||||
|
@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
|
||||||
amdgpu_set_pp_od_clk_voltage);
|
amdgpu_set_pp_od_clk_voltage);
|
||||||
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
||||||
amdgpu_get_busy_percent, NULL);
|
amdgpu_get_busy_percent, NULL);
|
||||||
|
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
|
||||||
|
amdgpu_get_memory_busy_percent, NULL);
|
||||||
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
||||||
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
|
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
|
||||||
amdgpu_get_ppfeature_status,
|
amdgpu_get_ppfeature_status,
|
||||||
amdgpu_set_ppfeature_status);
|
amdgpu_set_ppfeature_status);
|
||||||
|
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
|
||||||
|
|
||||||
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
|
@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||||
struct drm_device *ddev = adev->ddev;
|
struct drm_device *ddev = adev->ddev;
|
||||||
|
int channel = to_sensor_dev_attr(attr)->index;
|
||||||
int r, temp, size = sizeof(temp);
|
int r, temp, size = sizeof(temp);
|
||||||
|
|
||||||
/* Can't get temperature when the card is off */
|
/* Can't get temperature when the card is off */
|
||||||
|
@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||||
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* get the temperature */
|
if (channel >= PP_TEMP_MAX)
|
||||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
|
return -EINVAL;
|
||||||
(void *)&temp, &size);
|
|
||||||
if (r)
|
switch (channel) {
|
||||||
return r;
|
case PP_TEMP_JUNCTION:
|
||||||
|
/* get current junction temperature */
|
||||||
|
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
|
||||||
|
(void *)&temp, &size);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
break;
|
||||||
|
case PP_TEMP_EDGE:
|
||||||
|
/* get current edge temperature */
|
||||||
|
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
|
||||||
|
(void *)&temp, &size);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
break;
|
||||||
|
case PP_TEMP_MEM:
|
||||||
|
/* get current memory temperature */
|
||||||
|
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
|
||||||
|
(void *)&temp, &size);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||||
}
|
}
|
||||||
|
@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
|
||||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||||
|
int hyst = to_sensor_dev_attr(attr)->index;
|
||||||
|
int temp;
|
||||||
|
|
||||||
|
if (hyst)
|
||||||
|
temp = adev->pm.dpm.thermal.min_hotspot_temp;
|
||||||
|
else
|
||||||
|
temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||||
|
int hyst = to_sensor_dev_attr(attr)->index;
|
||||||
|
int temp;
|
||||||
|
|
||||||
|
if (hyst)
|
||||||
|
temp = adev->pm.dpm.thermal.min_mem_temp;
|
||||||
|
else
|
||||||
|
temp = adev->pm.dpm.thermal.max_mem_crit_temp;
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
int channel = to_sensor_dev_attr(attr)->index;
|
||||||
|
|
||||||
|
if (channel >= PP_TEMP_MAX)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||||
|
int channel = to_sensor_dev_attr(attr)->index;
|
||||||
|
int temp = 0;
|
||||||
|
|
||||||
|
if (channel >= PP_TEMP_MAX)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
switch (channel) {
|
||||||
|
case PP_TEMP_JUNCTION:
|
||||||
|
temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
|
||||||
|
break;
|
||||||
|
case PP_TEMP_EDGE:
|
||||||
|
temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
|
||||||
|
break;
|
||||||
|
case PP_TEMP_MEM:
|
||||||
|
temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
|
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
|
@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
|
||||||
*
|
*
|
||||||
* hwmon interfaces for GPU temperature:
|
* hwmon interfaces for GPU temperature:
|
||||||
*
|
*
|
||||||
* - temp1_input: the on die GPU temperature in millidegrees Celsius
|
* - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
|
||||||
|
* - temp2_input and temp3_input are supported on SOC15 dGPUs only
|
||||||
*
|
*
|
||||||
* - temp1_crit: temperature critical max value in millidegrees Celsius
|
* - temp[1-3]_label: temperature channel label
|
||||||
|
* - temp2_label and temp3_label are supported on SOC15 dGPUs only
|
||||||
*
|
*
|
||||||
* - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
|
* - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
|
||||||
|
* - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
|
||||||
|
*
|
||||||
|
* - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
|
||||||
|
* - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
|
||||||
|
*
|
||||||
|
* - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
|
||||||
|
* - these are supported on SOC15 dGPUs only
|
||||||
*
|
*
|
||||||
* hwmon interfaces for GPU voltage:
|
* hwmon interfaces for GPU voltage:
|
||||||
*
|
*
|
||||||
|
@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
|
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
|
||||||
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
|
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
|
||||||
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
|
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
|
||||||
|
static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
|
||||||
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
|
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
|
||||||
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
|
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
|
||||||
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
|
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
|
||||||
|
@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {
|
||||||
&sensor_dev_attr_temp1_input.dev_attr.attr,
|
&sensor_dev_attr_temp1_input.dev_attr.attr,
|
||||||
&sensor_dev_attr_temp1_crit.dev_attr.attr,
|
&sensor_dev_attr_temp1_crit.dev_attr.attr,
|
||||||
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
|
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp2_input.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp2_crit.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp3_input.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp3_crit.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp2_emergency.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp3_emergency.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp1_label.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp2_label.dev_attr.attr,
|
||||||
|
&sensor_dev_attr_temp3_label.dev_attr.attr,
|
||||||
&sensor_dev_attr_pwm1.dev_attr.attr,
|
&sensor_dev_attr_pwm1.dev_attr.attr,
|
||||||
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
|
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
|
||||||
&sensor_dev_attr_pwm1_min.dev_attr.attr,
|
&sensor_dev_attr_pwm1_min.dev_attr.attr,
|
||||||
|
@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
||||||
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
|
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* only SOC15 dGPUs support hotspot and mem temperatures */
|
||||||
|
if (((adev->flags & AMD_IS_APU) ||
|
||||||
|
adev->asic_type < CHIP_VEGA10) &&
|
||||||
|
(attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
|
||||||
|
attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
|
||||||
|
return 0;
|
||||||
|
|
||||||
return effective_mode;
|
return effective_mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||||
"gpu_busy_level\n");
|
"gpu_busy_level\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
/* APU does not have its own dedicated memory */
|
||||||
|
if (!(adev->flags & AMD_IS_APU)) {
|
||||||
|
ret = device_create_file(adev->dev,
|
||||||
|
&dev_attr_mem_busy_percent);
|
||||||
|
if (ret) {
|
||||||
|
DRM_ERROR("failed to create device file "
|
||||||
|
"mem_busy_percent\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* PCIe Perf counters won't work on APU nodes */
|
/* PCIe Perf counters won't work on APU nodes */
|
||||||
if (!(adev->flags & AMD_IS_APU)) {
|
if (!(adev->flags & AMD_IS_APU)) {
|
||||||
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
|
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
|
||||||
|
@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (adev->unique_id)
|
||||||
|
ret = device_create_file(adev->dev, &dev_attr_unique_id);
|
||||||
|
if (ret) {
|
||||||
|
DRM_ERROR("failed to create device file unique_id\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
ret = amdgpu_debugfs_pm_init(adev);
|
ret = amdgpu_debugfs_pm_init(adev);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
DRM_ERROR("Failed to register debugfs file for dpm!\n");
|
DRM_ERROR("Failed to register debugfs file for dpm!\n");
|
||||||
|
@ -2677,8 +2901,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
||||||
device_remove_file(adev->dev,
|
device_remove_file(adev->dev,
|
||||||
&dev_attr_pp_od_clk_voltage);
|
&dev_attr_pp_od_clk_voltage);
|
||||||
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
|
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
|
||||||
|
if (!(adev->flags & AMD_IS_APU))
|
||||||
|
device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
|
||||||
if (!(adev->flags & AMD_IS_APU))
|
if (!(adev->flags & AMD_IS_APU))
|
||||||
device_remove_file(adev->dev, &dev_attr_pcie_bw);
|
device_remove_file(adev->dev, &dev_attr_pcie_bw);
|
||||||
|
if (adev->unique_id)
|
||||||
|
device_remove_file(adev->dev, &dev_attr_unique_id);
|
||||||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||||
!(adev->flags & AMD_IS_APU))
|
!(adev->flags & AMD_IS_APU))
|
||||||
device_remove_file(adev->dev, &dev_attr_ppfeatures);
|
device_remove_file(adev->dev, &dev_attr_ppfeatures);
|
||||||
|
@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
|
||||||
/* GPU Load */
|
/* GPU Load */
|
||||||
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
|
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
|
||||||
seq_printf(m, "GPU Load: %u %%\n", value);
|
seq_printf(m, "GPU Load: %u %%\n", value);
|
||||||
|
/* MEM Load */
|
||||||
|
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
|
||||||
|
seq_printf(m, "MEM Load: %u %%\n", value);
|
||||||
|
|
||||||
seq_printf(m, "\n");
|
seq_printf(m, "\n");
|
||||||
|
|
||||||
/* SMC feature mask */
|
/* SMC feature mask */
|
||||||
|
|
|
@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||||
|
uint32_t id, uint32_t value)
|
||||||
|
{
|
||||||
|
cmd->cmd_id = GFX_CMD_ID_PROG_REG;
|
||||||
|
cmd->cmd.cmd_setup_reg_prog.reg_value = value;
|
||||||
|
cmd->cmd.cmd_setup_reg_prog.reg_id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
|
||||||
|
uint32_t value)
|
||||||
|
{
|
||||||
|
struct psp_gfx_cmd_resp *cmd = NULL;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (reg >= PSP_REG_LAST)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||||
|
if (!cmd)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
|
||||||
|
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
|
||||||
|
|
||||||
|
kfree(cmd);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||||
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
|
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
|
||||||
uint32_t xgmi_ta_size, uint32_t shared_size)
|
uint32_t xgmi_ta_size, uint32_t shared_size)
|
||||||
|
|
|
@ -62,6 +62,14 @@ struct psp_ring
|
||||||
uint32_t ring_size;
|
uint32_t ring_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* More registers may will be supported */
|
||||||
|
enum psp_reg_prog_id {
|
||||||
|
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
|
||||||
|
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
|
||||||
|
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
|
||||||
|
PSP_REG_LAST
|
||||||
|
};
|
||||||
|
|
||||||
struct psp_funcs
|
struct psp_funcs
|
||||||
{
|
{
|
||||||
int (*init_microcode)(struct psp_context *psp);
|
int (*init_microcode)(struct psp_context *psp);
|
||||||
|
@ -95,12 +103,26 @@ struct psp_funcs
|
||||||
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
|
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
||||||
|
struct psp_xgmi_node_info {
|
||||||
|
uint64_t node_id;
|
||||||
|
uint8_t num_hops;
|
||||||
|
uint8_t is_sharing_enabled;
|
||||||
|
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct psp_xgmi_topology_info {
|
||||||
|
uint32_t num_nodes;
|
||||||
|
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||||
|
};
|
||||||
|
|
||||||
struct psp_xgmi_context {
|
struct psp_xgmi_context {
|
||||||
uint8_t initialized;
|
uint8_t initialized;
|
||||||
uint32_t session_id;
|
uint32_t session_id;
|
||||||
struct amdgpu_bo *xgmi_shared_bo;
|
struct amdgpu_bo *xgmi_shared_bo;
|
||||||
uint64_t xgmi_shared_mc_addr;
|
uint64_t xgmi_shared_mc_addr;
|
||||||
void *xgmi_shared_buf;
|
void *xgmi_shared_buf;
|
||||||
|
struct psp_xgmi_topology_info top_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct psp_ras_context {
|
struct psp_ras_context {
|
||||||
|
@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
|
||||||
enum AMDGPU_UCODE_ID);
|
enum AMDGPU_UCODE_ID);
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
|
||||||
struct psp_xgmi_node_info {
|
|
||||||
uint64_t node_id;
|
|
||||||
uint8_t num_hops;
|
|
||||||
uint8_t is_sharing_enabled;
|
|
||||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct psp_xgmi_topology_info {
|
|
||||||
uint32_t num_nodes;
|
|
||||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
|
||||||
};
|
|
||||||
|
|
||||||
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
|
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
|
||||||
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
|
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
|
||||||
|
@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
|
||||||
union ta_ras_cmd_input *info, bool enable);
|
union ta_ras_cmd_input *info, bool enable);
|
||||||
|
|
||||||
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
|
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
|
||||||
|
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
|
||||||
|
uint32_t value);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -90,6 +90,12 @@ struct ras_manager {
|
||||||
struct ras_err_data err_data;
|
struct ras_err_data err_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ras_badpage {
|
||||||
|
unsigned int bp;
|
||||||
|
unsigned int size;
|
||||||
|
unsigned int flags;
|
||||||
|
};
|
||||||
|
|
||||||
const char *ras_error_string[] = {
|
const char *ras_error_string[] = {
|
||||||
"none",
|
"none",
|
||||||
"parity",
|
"parity",
|
||||||
|
@ -118,7 +124,8 @@ const char *ras_block_string[] = {
|
||||||
#define ras_err_str(i) (ras_error_string[ffs(i)])
|
#define ras_err_str(i) (ras_error_string[ffs(i)])
|
||||||
#define ras_block_str(i) (ras_block_string[i])
|
#define ras_block_str(i) (ras_block_string[i])
|
||||||
|
|
||||||
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
|
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
|
||||||
|
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
|
||||||
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
|
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
|
||||||
|
|
||||||
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
|
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
|
||||||
|
@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/*
|
/**
|
||||||
* DOC: ras debugfs control interface
|
* DOC: AMDGPU RAS debugfs control interface
|
||||||
*
|
*
|
||||||
* It accepts struct ras_debug_if who has two members.
|
* It accepts struct ras_debug_if who has two members.
|
||||||
*
|
*
|
||||||
|
@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
||||||
enable ? "enable":"disable",
|
enable ? "enable":"disable",
|
||||||
ras_block_str(head->block),
|
ras_block_str(head->block),
|
||||||
ret);
|
ret);
|
||||||
|
if (ret == TA_RAS_STATUS__RESET_NEEDED)
|
||||||
|
return -EAGAIN;
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
|
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
|
||||||
/* If ras is enabled by vbios, we set up ras object first in
|
if (enable) {
|
||||||
* both case. For enable, that is all what we need do. For
|
/* There is no harm to issue a ras TA cmd regardless of
|
||||||
* disable, we need perform a ras TA disable cmd after that.
|
* the currecnt ras state.
|
||||||
*/
|
* If current state == target state, it will do nothing
|
||||||
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
* But sometimes it requests driver to reset and repost
|
||||||
if (ret)
|
* with error code -EAGAIN.
|
||||||
return ret;
|
*/
|
||||||
|
ret = amdgpu_ras_feature_enable(adev, head, 1);
|
||||||
|
/* With old ras TA, we might fail to enable ras.
|
||||||
|
* Log it and just setup the object.
|
||||||
|
* TODO need remove this WA in the future.
|
||||||
|
*/
|
||||||
|
if (ret == -EINVAL) {
|
||||||
|
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
||||||
|
if (!ret)
|
||||||
|
DRM_INFO("RAS INFO: %s setup object\n",
|
||||||
|
ras_block_str(head->block));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* setup the object then issue a ras TA disable cmd.*/
|
||||||
|
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (!enable)
|
|
||||||
ret = amdgpu_ras_feature_enable(adev, head, 0);
|
ret = amdgpu_ras_feature_enable(adev, head, 0);
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
ret = amdgpu_ras_feature_enable(adev, head, enable);
|
ret = amdgpu_ras_feature_enable(adev, head, enable);
|
||||||
|
|
||||||
|
@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||||
|
|
||||||
/* sysfs begin */
|
/* sysfs begin */
|
||||||
|
|
||||||
|
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||||
|
struct ras_badpage **bps, unsigned int *count);
|
||||||
|
|
||||||
|
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
|
||||||
|
{
|
||||||
|
switch (flags) {
|
||||||
|
case 0:
|
||||||
|
return "R";
|
||||||
|
case 1:
|
||||||
|
return "P";
|
||||||
|
case 2:
|
||||||
|
default:
|
||||||
|
return "F";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DOC: ras sysfs gpu_vram_bad_pages interface
|
||||||
|
*
|
||||||
|
* It allows user to read the bad pages of vram on the gpu through
|
||||||
|
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
|
||||||
|
*
|
||||||
|
* It outputs multiple lines, and each line stands for one gpu page.
|
||||||
|
*
|
||||||
|
* The format of one line is below,
|
||||||
|
* gpu pfn : gpu page size : flags
|
||||||
|
*
|
||||||
|
* gpu pfn and gpu page size are printed in hex format.
|
||||||
|
* flags can be one of below character,
|
||||||
|
* R: reserved, this gpu page is reserved and not able to use.
|
||||||
|
* P: pending for reserve, this gpu page is marked as bad, will be reserved
|
||||||
|
* in next window of page_reserve.
|
||||||
|
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
|
||||||
|
*
|
||||||
|
* examples:
|
||||||
|
* 0x00000001 : 0x00001000 : R
|
||||||
|
* 0x00000002 : 0x00001000 : P
|
||||||
|
*/
|
||||||
|
|
||||||
|
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
|
||||||
|
struct kobject *kobj, struct bin_attribute *attr,
|
||||||
|
char *buf, loff_t ppos, size_t count)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *con =
|
||||||
|
container_of(attr, struct amdgpu_ras, badpages_attr);
|
||||||
|
struct amdgpu_device *adev = con->adev;
|
||||||
|
const unsigned int element_size =
|
||||||
|
sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
|
||||||
|
unsigned int start = div64_ul(ppos + element_size - 1, element_size);
|
||||||
|
unsigned int end = div64_ul(ppos + count - 1, element_size);
|
||||||
|
ssize_t s = 0;
|
||||||
|
struct ras_badpage *bps = NULL;
|
||||||
|
unsigned int bps_count = 0;
|
||||||
|
|
||||||
|
memset(buf, 0, count);
|
||||||
|
|
||||||
|
if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (; start < end && start < bps_count; start++)
|
||||||
|
s += scnprintf(&buf[s], element_size + 1,
|
||||||
|
"0x%08x : 0x%08x : %1s\n",
|
||||||
|
bps[start].bp,
|
||||||
|
bps[start].size,
|
||||||
|
amdgpu_ras_badpage_flags_str(bps[start].flags));
|
||||||
|
|
||||||
|
kfree(bps);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
|
@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
||||||
&con->features_attr.attr,
|
&con->features_attr.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
struct bin_attribute *bin_attrs[] = {
|
||||||
|
&con->badpages_attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
struct attribute_group group = {
|
struct attribute_group group = {
|
||||||
.name = "ras",
|
.name = "ras",
|
||||||
.attrs = attrs,
|
.attrs = attrs,
|
||||||
|
.bin_attrs = bin_attrs,
|
||||||
};
|
};
|
||||||
|
|
||||||
con->features_attr = (struct device_attribute) {
|
con->features_attr = (struct device_attribute) {
|
||||||
|
@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
||||||
},
|
},
|
||||||
.show = amdgpu_ras_sysfs_features_read,
|
.show = amdgpu_ras_sysfs_features_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
con->badpages_attr = (struct bin_attribute) {
|
||||||
|
.attr = {
|
||||||
|
.name = "gpu_vram_bad_pages",
|
||||||
|
.mode = S_IRUGO,
|
||||||
|
},
|
||||||
|
.size = 0,
|
||||||
|
.private = NULL,
|
||||||
|
.read = amdgpu_ras_sysfs_badpages_read,
|
||||||
|
};
|
||||||
|
|
||||||
sysfs_attr_init(attrs[0]);
|
sysfs_attr_init(attrs[0]);
|
||||||
|
sysfs_bin_attr_init(bin_attrs[0]);
|
||||||
|
|
||||||
return sysfs_create_group(&adev->dev->kobj, &group);
|
return sysfs_create_group(&adev->dev->kobj, &group);
|
||||||
}
|
}
|
||||||
|
@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
|
||||||
&con->features_attr.attr,
|
&con->features_attr.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
struct bin_attribute *bin_attrs[] = {
|
||||||
|
&con->badpages_attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
struct attribute_group group = {
|
struct attribute_group group = {
|
||||||
.name = "ras",
|
.name = "ras",
|
||||||
.attrs = attrs,
|
.attrs = attrs,
|
||||||
|
.bin_attrs = bin_attrs,
|
||||||
};
|
};
|
||||||
|
|
||||||
sysfs_remove_group(&adev->dev->kobj, &group);
|
sysfs_remove_group(&adev->dev->kobj, &group);
|
||||||
|
@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
|
||||||
/* ih end */
|
/* ih end */
|
||||||
|
|
||||||
/* recovery begin */
|
/* recovery begin */
|
||||||
|
|
||||||
|
/* return 0 on success.
|
||||||
|
* caller need free bps.
|
||||||
|
*/
|
||||||
|
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||||
|
struct ras_badpage **bps, unsigned int *count)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||||
|
struct ras_err_handler_data *data;
|
||||||
|
int i = 0;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!con || !con->eh_data || !bps || !count)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&con->recovery_lock);
|
||||||
|
data = con->eh_data;
|
||||||
|
if (!data || data->count == 0) {
|
||||||
|
*bps = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
|
||||||
|
if (!*bps) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < data->count; i++) {
|
||||||
|
(*bps)[i] = (struct ras_badpage){
|
||||||
|
.bp = data->bps[i].bp,
|
||||||
|
.size = AMDGPU_GPU_PAGE_SIZE,
|
||||||
|
.flags = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (data->last_reserved <= i)
|
||||||
|
(*bps)[i].flags = 1;
|
||||||
|
else if (data->bps[i].bo == NULL)
|
||||||
|
(*bps)[i].flags = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
*count = data->count;
|
||||||
|
out:
|
||||||
|
mutex_unlock(&con->recovery_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void amdgpu_ras_do_recovery(struct work_struct *work)
|
static void amdgpu_ras_do_recovery(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct amdgpu_ras *ras =
|
struct amdgpu_ras *ras =
|
||||||
|
@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
|
||||||
}
|
}
|
||||||
/* recovery end */
|
/* recovery end */
|
||||||
|
|
||||||
|
/* return 0 if ras will reset gpu and repost.*/
|
||||||
|
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||||
|
unsigned int block)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
|
|
||||||
|
if (!ras)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check hardware's ras ability which will be saved in hw_supported.
|
* check hardware's ras ability which will be saved in hw_supported.
|
||||||
* if hardware does not support ras, we can skip some ras initializtion and
|
* if hardware does not support ras, we can skip some ras initializtion and
|
||||||
|
@ -1415,8 +1593,10 @@ recovery_out:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* do some init work after IP late init as dependence */
|
/* do some init work after IP late init as dependence.
|
||||||
void amdgpu_ras_post_init(struct amdgpu_device *adev)
|
* and it runs in resume/gpu reset/booting up cases.
|
||||||
|
*/
|
||||||
|
void amdgpu_ras_resume(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||||
struct ras_manager *obj, *tmp;
|
struct ras_manager *obj, *tmp;
|
||||||
|
@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
|
||||||
|
con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
|
||||||
|
/* setup ras obj state as disabled.
|
||||||
|
* for init_by_vbios case.
|
||||||
|
* if we want to enable ras, just enable it in a normal way.
|
||||||
|
* If we want do disable it, need setup ras obj as enabled,
|
||||||
|
* then issue another TA disable cmd.
|
||||||
|
* See feature_enable_on_boot
|
||||||
|
*/
|
||||||
|
amdgpu_ras_disable_all_features(adev, 1);
|
||||||
|
amdgpu_ras_reset_gpu(adev, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void amdgpu_ras_suspend(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||||
|
|
||||||
|
if (!con)
|
||||||
|
return;
|
||||||
|
|
||||||
|
amdgpu_ras_disable_all_features(adev, 0);
|
||||||
|
/* Make sure all ras objects are disabled. */
|
||||||
|
if (con->features)
|
||||||
|
amdgpu_ras_disable_all_features(adev, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* do some fini work before IP fini as dependence */
|
/* do some fini work before IP fini as dependence */
|
||||||
|
|
|
@ -93,6 +93,7 @@ struct amdgpu_ras {
|
||||||
struct dentry *ent;
|
struct dentry *ent;
|
||||||
/* sysfs */
|
/* sysfs */
|
||||||
struct device_attribute features_attr;
|
struct device_attribute features_attr;
|
||||||
|
struct bin_attribute badpages_attr;
|
||||||
/* block array */
|
/* block array */
|
||||||
struct ras_manager *objs;
|
struct ras_manager *objs;
|
||||||
|
|
||||||
|
@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
|
||||||
return ras && (ras->supported & (1 << block));
|
return ras && (ras->supported & (1 << block));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||||
|
unsigned int block);
|
||||||
|
|
||||||
|
void amdgpu_ras_resume(struct amdgpu_device *adev);
|
||||||
|
void amdgpu_ras_suspend(struct amdgpu_device *adev);
|
||||||
|
|
||||||
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||||
bool is_ce);
|
bool is_ce);
|
||||||
|
|
||||||
|
@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
|
||||||
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
|
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
|
||||||
bool is_baco)
|
bool is_baco)
|
||||||
{
|
{
|
||||||
/* remove me when gpu reset works on vega20 A1. */
|
|
||||||
#if 0
|
|
||||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
|
|
||||||
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
|
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
|
||||||
schedule_work(&ras->recovery_work);
|
schedule_work(&ras->recovery_work);
|
||||||
#endif
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
|
||||||
|
|
||||||
/* called in ip_init and ip_fini */
|
/* called in ip_init and ip_fini */
|
||||||
int amdgpu_ras_init(struct amdgpu_device *adev);
|
int amdgpu_ras_init(struct amdgpu_device *adev);
|
||||||
void amdgpu_ras_post_init(struct amdgpu_device *adev);
|
|
||||||
int amdgpu_ras_fini(struct amdgpu_device *adev);
|
int amdgpu_ras_fini(struct amdgpu_device *adev);
|
||||||
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
|
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
|
||||||
uint32_t align_mask;
|
uint32_t align_mask;
|
||||||
u32 nop;
|
u32 nop;
|
||||||
bool support_64bit_ptrs;
|
bool support_64bit_ptrs;
|
||||||
|
bool no_user_fence;
|
||||||
unsigned vmhub;
|
unsigned vmhub;
|
||||||
unsigned extra_dw;
|
unsigned extra_dw;
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/iommu.h>
|
#include <linux/iommu.h>
|
||||||
|
#include <linux/hmm.h>
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#include "amdgpu_object.h"
|
#include "amdgpu_object.h"
|
||||||
#include "amdgpu_trace.h"
|
#include "amdgpu_trace.h"
|
||||||
|
@ -703,100 +704,177 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
|
||||||
/*
|
/*
|
||||||
* TTM backend functions.
|
* TTM backend functions.
|
||||||
*/
|
*/
|
||||||
struct amdgpu_ttm_gup_task_list {
|
|
||||||
struct list_head list;
|
|
||||||
struct task_struct *task;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct amdgpu_ttm_tt {
|
struct amdgpu_ttm_tt {
|
||||||
struct ttm_dma_tt ttm;
|
struct ttm_dma_tt ttm;
|
||||||
u64 offset;
|
u64 offset;
|
||||||
uint64_t userptr;
|
uint64_t userptr;
|
||||||
struct task_struct *usertask;
|
struct task_struct *usertask;
|
||||||
uint32_t userflags;
|
uint32_t userflags;
|
||||||
spinlock_t guptasklock;
|
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||||
struct list_head guptasks;
|
struct hmm_range *ranges;
|
||||||
atomic_t mmu_invalidations;
|
int nr_ranges;
|
||||||
uint32_t last_set_pages;
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
|
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
|
||||||
* pointer to memory
|
* memory and start HMM tracking CPU page table update
|
||||||
*
|
*
|
||||||
* Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
|
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
|
||||||
* This provides a wrapper around the get_user_pages() call to provide
|
* once afterwards to stop HMM tracking
|
||||||
* device accessible pages that back user memory.
|
|
||||||
*/
|
*/
|
||||||
|
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||||
|
|
||||||
|
/* Support Userptr pages cross max 16 vmas */
|
||||||
|
#define MAX_NR_VMAS (16)
|
||||||
|
|
||||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||||
{
|
{
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||||
struct mm_struct *mm = gtt->usertask->mm;
|
struct mm_struct *mm = gtt->usertask->mm;
|
||||||
unsigned int flags = 0;
|
unsigned long start = gtt->userptr;
|
||||||
unsigned pinned = 0;
|
unsigned long end = start + ttm->num_pages * PAGE_SIZE;
|
||||||
int r;
|
struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
|
||||||
|
struct hmm_range *ranges;
|
||||||
|
unsigned long nr_pages, i;
|
||||||
|
uint64_t *pfns, f;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
if (!mm) /* Happens during process shutdown */
|
if (!mm) /* Happens during process shutdown */
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
|
|
||||||
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
|
|
||||||
flags |= FOLL_WRITE;
|
|
||||||
|
|
||||||
down_read(&mm->mmap_sem);
|
down_read(&mm->mmap_sem);
|
||||||
|
|
||||||
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
|
/* user pages may cross multiple VMAs */
|
||||||
/*
|
gtt->nr_ranges = 0;
|
||||||
* check that we only use anonymous memory to prevent problems
|
do {
|
||||||
* with writeback
|
unsigned long vm_start;
|
||||||
*/
|
|
||||||
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
|
|
||||||
struct vm_area_struct *vma;
|
|
||||||
|
|
||||||
vma = find_vma(mm, gtt->userptr);
|
if (gtt->nr_ranges >= MAX_NR_VMAS) {
|
||||||
if (!vma || vma->vm_file || vma->vm_end < end) {
|
DRM_ERROR("Too many VMAs in userptr range\n");
|
||||||
up_read(&mm->mmap_sem);
|
r = -EFAULT;
|
||||||
return -EPERM;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vm_start = vma ? vma->vm_end : start;
|
||||||
|
vma = find_vma(mm, vm_start);
|
||||||
|
if (unlikely(!vma || vm_start < vma->vm_start)) {
|
||||||
|
r = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
vmas[gtt->nr_ranges++] = vma;
|
||||||
|
} while (end > vma->vm_end);
|
||||||
|
|
||||||
|
DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
|
||||||
|
start, gtt->nr_ranges, ttm->num_pages);
|
||||||
|
|
||||||
|
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
|
||||||
|
vmas[0]->vm_file)) {
|
||||||
|
r = -EPERM;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* loop enough times using contiguous pages of memory */
|
ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
|
||||||
do {
|
if (unlikely(!ranges)) {
|
||||||
unsigned num_pages = ttm->num_pages - pinned;
|
r = -ENOMEM;
|
||||||
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
|
goto out;
|
||||||
struct page **p = pages + pinned;
|
}
|
||||||
struct amdgpu_ttm_gup_task_list guptask;
|
|
||||||
|
|
||||||
guptask.task = current;
|
pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
|
||||||
spin_lock(>t->guptasklock);
|
if (unlikely(!pfns)) {
|
||||||
list_add(&guptask.list, >t->guptasks);
|
r = -ENOMEM;
|
||||||
spin_unlock(>t->guptasklock);
|
goto out_free_ranges;
|
||||||
|
}
|
||||||
|
|
||||||
if (mm == current->mm)
|
for (i = 0; i < gtt->nr_ranges; i++)
|
||||||
r = get_user_pages(userptr, num_pages, flags, p, NULL);
|
amdgpu_hmm_init_range(&ranges[i]);
|
||||||
else
|
|
||||||
r = get_user_pages_remote(gtt->usertask,
|
|
||||||
mm, userptr, num_pages,
|
|
||||||
flags, p, NULL, NULL);
|
|
||||||
|
|
||||||
spin_lock(>t->guptasklock);
|
f = ranges[0].flags[HMM_PFN_VALID];
|
||||||
list_del(&guptask.list);
|
f |= amdgpu_ttm_tt_is_readonly(ttm) ?
|
||||||
spin_unlock(>t->guptasklock);
|
0 : ranges[0].flags[HMM_PFN_WRITE];
|
||||||
|
memset64(pfns, f, ttm->num_pages);
|
||||||
|
|
||||||
if (r < 0)
|
for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
|
||||||
goto release_pages;
|
ranges[i].vma = vmas[i];
|
||||||
|
ranges[i].start = max(start, vmas[i]->vm_start);
|
||||||
|
ranges[i].end = min(end, vmas[i]->vm_end);
|
||||||
|
ranges[i].pfns = pfns + nr_pages;
|
||||||
|
nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
|
||||||
|
|
||||||
pinned += r;
|
r = hmm_vma_fault(&ranges[i], true);
|
||||||
|
if (unlikely(r))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (unlikely(r)) {
|
||||||
|
while (i--)
|
||||||
|
hmm_vma_range_done(&ranges[i]);
|
||||||
|
|
||||||
} while (pinned < ttm->num_pages);
|
goto out_free_pfns;
|
||||||
|
}
|
||||||
|
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
|
for (i = 0; i < ttm->num_pages; i++) {
|
||||||
|
pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
|
||||||
|
if (!pages[i]) {
|
||||||
|
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
|
||||||
|
i, pfns[i]);
|
||||||
|
goto out_invalid_pfn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gtt->ranges = ranges;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
release_pages:
|
out_free_pfns:
|
||||||
release_pages(pages, pinned);
|
kvfree(pfns);
|
||||||
|
out_free_ranges:
|
||||||
|
kvfree(ranges);
|
||||||
|
out:
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
|
||||||
|
out_invalid_pfn:
|
||||||
|
for (i = 0; i < gtt->nr_ranges; i++)
|
||||||
|
hmm_vma_range_done(&ranges[i]);
|
||||||
|
kvfree(pfns);
|
||||||
|
kvfree(ranges);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
|
||||||
|
* Check if the pages backing this ttm range have been invalidated
|
||||||
|
*
|
||||||
|
* Returns: true if pages are still valid
|
||||||
|
*/
|
||||||
|
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||||
|
{
|
||||||
|
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||||
|
bool r = false;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!gtt || !gtt->userptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
|
||||||
|
gtt->userptr, gtt->nr_ranges, ttm->num_pages);
|
||||||
|
|
||||||
|
WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
|
||||||
|
"No user pages to check\n");
|
||||||
|
|
||||||
|
if (gtt->ranges) {
|
||||||
|
for (i = 0; i < gtt->nr_ranges; i++)
|
||||||
|
r |= hmm_vma_range_done(>t->ranges[i]);
|
||||||
|
kvfree(gtt->ranges[0].pfns);
|
||||||
|
kvfree(gtt->ranges);
|
||||||
|
gtt->ranges = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
|
* amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
|
||||||
|
@ -807,39 +885,10 @@ release_pages:
|
||||||
*/
|
*/
|
||||||
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
|
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||||
{
|
{
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
unsigned long i;
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
gtt->last_set_pages = atomic_read(>t->mmu_invalidations);
|
|
||||||
for (i = 0; i < ttm->num_pages; ++i) {
|
|
||||||
if (ttm->pages[i])
|
|
||||||
put_page(ttm->pages[i]);
|
|
||||||
|
|
||||||
|
for (i = 0; i < ttm->num_pages; ++i)
|
||||||
ttm->pages[i] = pages ? pages[i] : NULL;
|
ttm->pages[i] = pages ? pages[i] : NULL;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
|
|
||||||
*
|
|
||||||
* Called while unpinning userptr pages
|
|
||||||
*/
|
|
||||||
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
|
|
||||||
{
|
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
for (i = 0; i < ttm->num_pages; ++i) {
|
|
||||||
struct page *page = ttm->pages[i];
|
|
||||||
|
|
||||||
if (!page)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
|
|
||||||
set_page_dirty(page);
|
|
||||||
|
|
||||||
mark_page_accessed(page);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
|
||||||
/* unmap the pages mapped to the device */
|
/* unmap the pages mapped to the device */
|
||||||
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
|
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
|
||||||
|
|
||||||
/* mark the pages as dirty */
|
|
||||||
amdgpu_ttm_tt_mark_user_pages(ttm);
|
|
||||||
|
|
||||||
sg_free_table(ttm->sg);
|
sg_free_table(ttm->sg);
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||||
|
if (gtt->ranges &&
|
||||||
|
ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0],
|
||||||
|
gtt->ranges[0].pfns[0]))
|
||||||
|
WARN_ONCE(1, "Missing get_user_page_done\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
|
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
|
||||||
|
@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
||||||
gtt->usertask = current->group_leader;
|
gtt->usertask = current->group_leader;
|
||||||
get_task_struct(gtt->usertask);
|
get_task_struct(gtt->usertask);
|
||||||
|
|
||||||
spin_lock_init(>t->guptasklock);
|
|
||||||
INIT_LIST_HEAD(>t->guptasks);
|
|
||||||
atomic_set(>t->mmu_invalidations, 0);
|
|
||||||
gtt->last_set_pages = 0;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
||||||
unsigned long end)
|
unsigned long end)
|
||||||
{
|
{
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||||
struct amdgpu_ttm_gup_task_list *entry;
|
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
|
|
||||||
if (gtt == NULL || !gtt->userptr)
|
if (gtt == NULL || !gtt->userptr)
|
||||||
|
@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
||||||
if (gtt->userptr > end || gtt->userptr + size <= start)
|
if (gtt->userptr > end || gtt->userptr + size <= start)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Search the lists of tasks that hold this mapping and see
|
|
||||||
* if current is one of them. If it is return false.
|
|
||||||
*/
|
|
||||||
spin_lock(>t->guptasklock);
|
|
||||||
list_for_each_entry(entry, >t->guptasks, list) {
|
|
||||||
if (entry->task == current) {
|
|
||||||
spin_unlock(>t->guptasklock);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(>t->guptasklock);
|
|
||||||
|
|
||||||
atomic_inc(>t->mmu_invalidations);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
|
* amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
|
||||||
*/
|
*/
|
||||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
|
||||||
int *last_invalidated)
|
|
||||||
{
|
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
|
||||||
int prev_invalidated = *last_invalidated;
|
|
||||||
|
|
||||||
*last_invalidated = atomic_read(>t->mmu_invalidations);
|
|
||||||
return prev_invalidated != *last_invalidated;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
|
|
||||||
* been invalidated since the last time they've been set?
|
|
||||||
*/
|
|
||||||
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
|
|
||||||
{
|
{
|
||||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||||
|
|
||||||
if (gtt == NULL || !gtt->userptr)
|
if (gtt == NULL || !gtt->userptr)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
||||||
|
|
||||||
/* Initialize various on-chip memory pools */
|
/* Initialize various on-chip memory pools */
|
||||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
|
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
|
||||||
adev->gds.mem.total_size);
|
adev->gds.gds_size);
|
||||||
if (r) {
|
if (r) {
|
||||||
DRM_ERROR("Failed initializing GDS heap.\n");
|
DRM_ERROR("Failed initializing GDS heap.\n");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
|
|
||||||
4, AMDGPU_GEM_DOMAIN_GDS,
|
|
||||||
&adev->gds.gds_gfx_bo, NULL, NULL);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
|
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
|
||||||
adev->gds.gws.total_size);
|
adev->gds.gws_size);
|
||||||
if (r) {
|
if (r) {
|
||||||
DRM_ERROR("Failed initializing gws heap.\n");
|
DRM_ERROR("Failed initializing gws heap.\n");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
|
|
||||||
1, AMDGPU_GEM_DOMAIN_GWS,
|
|
||||||
&adev->gds.gws_gfx_bo, NULL, NULL);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
|
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
|
||||||
adev->gds.oa.total_size);
|
adev->gds.oa_size);
|
||||||
if (r) {
|
if (r) {
|
||||||
DRM_ERROR("Failed initializing oa heap.\n");
|
DRM_ERROR("Failed initializing oa heap.\n");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
|
|
||||||
1, AMDGPU_GEM_DOMAIN_OA,
|
|
||||||
&adev->gds.oa_gfx_bo, NULL, NULL);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
/* Register debugfs entries for amdgpu_ttm */
|
/* Register debugfs entries for amdgpu_ttm */
|
||||||
r = amdgpu_ttm_debugfs_init(adev);
|
r = amdgpu_ttm_debugfs_init(adev);
|
||||||
if (r) {
|
if (r) {
|
||||||
|
|
|
@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
|
||||||
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
|
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
|
||||||
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
|
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
|
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
|
||||||
|
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
|
||||||
|
#else
|
||||||
|
static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||||
|
{
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
|
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
|
||||||
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
|
|
||||||
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
||||||
uint32_t flags);
|
uint32_t flags);
|
||||||
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
|
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
|
||||||
|
@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
||||||
unsigned long end);
|
unsigned long end);
|
||||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
||||||
int *last_invalidated);
|
int *last_invalidated);
|
||||||
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
|
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
|
||||||
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
|
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
|
||||||
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
|
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
|
||||||
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
|
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
|
||||||
|
|
|
@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
|
||||||
return AMDGPU_FW_LOAD_DIRECT;
|
return AMDGPU_FW_LOAD_DIRECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define FW_VERSION_ATTR(name, mode, field) \
|
||||||
|
static ssize_t show_##name(struct device *dev, \
|
||||||
|
struct device_attribute *attr, \
|
||||||
|
char *buf) \
|
||||||
|
{ \
|
||||||
|
struct drm_device *ddev = dev_get_drvdata(dev); \
|
||||||
|
struct amdgpu_device *adev = ddev->dev_private; \
|
||||||
|
\
|
||||||
|
return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
|
||||||
|
} \
|
||||||
|
static DEVICE_ATTR(name, mode, show_##name, NULL)
|
||||||
|
|
||||||
|
FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
|
||||||
|
FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
|
||||||
|
FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
|
||||||
|
FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
|
||||||
|
FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
|
||||||
|
FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
|
||||||
|
FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
|
||||||
|
FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
|
||||||
|
FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
|
||||||
|
FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
|
||||||
|
FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
|
||||||
|
FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
|
||||||
|
FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
|
||||||
|
FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
|
||||||
|
FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
|
||||||
|
FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
|
||||||
|
FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
|
||||||
|
FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
|
||||||
|
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
|
||||||
|
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
|
||||||
|
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
|
||||||
|
|
||||||
|
static struct attribute *fw_attrs[] = {
|
||||||
|
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
|
||||||
|
&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
|
||||||
|
&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
|
||||||
|
&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
|
||||||
|
&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
|
||||||
|
&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
|
||||||
|
&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
|
||||||
|
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
|
||||||
|
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
|
||||||
|
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
|
||||||
|
&dev_attr_dmcu_fw_version.attr, NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct attribute_group fw_attr_group = {
|
||||||
|
.name = "fw_version",
|
||||||
|
.attrs = fw_attrs
|
||||||
|
};
|
||||||
|
|
||||||
|
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
|
||||||
|
}
|
||||||
|
|
||||||
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
|
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
|
||||||
struct amdgpu_firmware_info *ucode,
|
struct amdgpu_firmware_info *ucode,
|
||||||
uint64_t mc_addr, void *kptr)
|
uint64_t mc_addr, void *kptr)
|
||||||
|
|
|
@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
|
||||||
|
|
||||||
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
|
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
|
||||||
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
|
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
|
||||||
|
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
|
||||||
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
|
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
|
||||||
|
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
|
||||||
|
|
||||||
enum amdgpu_firmware_load_type
|
enum amdgpu_firmware_load_type
|
||||||
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
|
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
|
||||||
|
|
|
@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
|
|
||||||
struct dpg_pause_state *new_state)
|
|
||||||
{
|
|
||||||
int ret_code;
|
|
||||||
uint32_t reg_data = 0;
|
|
||||||
uint32_t reg_data2 = 0;
|
|
||||||
struct amdgpu_ring *ring;
|
|
||||||
|
|
||||||
/* pause/unpause if state is changed */
|
|
||||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
|
||||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
|
||||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
|
||||||
new_state->fw_based, new_state->jpeg);
|
|
||||||
|
|
||||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
|
||||||
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
|
||||||
|
|
||||||
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
|
|
||||||
ret_code = 0;
|
|
||||||
|
|
||||||
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
|
||||||
|
|
||||||
if (!ret_code) {
|
|
||||||
/* pause DPG non-jpeg */
|
|
||||||
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
|
||||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
|
|
||||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
|
||||||
|
|
||||||
/* Restore */
|
|
||||||
ring = &adev->vcn.ring_enc[0];
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
|
||||||
|
|
||||||
ring = &adev->vcn.ring_enc[1];
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
|
||||||
|
|
||||||
ring = &adev->vcn.ring_dec;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
|
||||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
|
||||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* unpause dpg non-jpeg, no need to wait */
|
|
||||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
|
||||||
}
|
|
||||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pause/unpause if state is changed */
|
|
||||||
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
|
|
||||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
|
||||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
|
||||||
new_state->fw_based, new_state->jpeg);
|
|
||||||
|
|
||||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
|
||||||
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
|
|
||||||
|
|
||||||
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
|
|
||||||
ret_code = 0;
|
|
||||||
|
|
||||||
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
|
||||||
|
|
||||||
if (!ret_code) {
|
|
||||||
/* Make sure JPRG Snoop is disabled before sending the pause */
|
|
||||||
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
|
|
||||||
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
|
|
||||||
|
|
||||||
/* pause DPG jpeg */
|
|
||||||
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
|
||||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
|
|
||||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
|
|
||||||
|
|
||||||
/* Restore */
|
|
||||||
ring = &adev->vcn.ring_jpeg;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
|
||||||
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
|
||||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
|
|
||||||
lower_32_bits(ring->gpu_addr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
|
|
||||||
upper_32_bits(ring->gpu_addr));
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
|
||||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
|
||||||
|
|
||||||
ring = &adev->vcn.ring_dec;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
|
||||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
|
||||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
|
||||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
|
||||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* unpause dpg jpeg, no need to wait */
|
|
||||||
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
|
||||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
|
||||||
}
|
|
||||||
adev->vcn.pause_state.jpeg = new_state->jpeg;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev =
|
struct amdgpu_device *adev =
|
||||||
|
@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
||||||
else
|
else
|
||||||
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
||||||
|
|
||||||
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
|
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
|
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
|
||||||
|
@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
|
||||||
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
|
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
|
||||||
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
||||||
|
|
||||||
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
|
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,27 @@
|
||||||
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
|
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
|
||||||
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
|
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
|
||||||
|
|
||||||
|
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
|
||||||
|
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||||
|
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||||
|
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
|
||||||
|
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||||
|
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||||
|
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||||
|
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
|
||||||
|
do { \
|
||||||
|
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
|
||||||
|
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||||
|
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||||
|
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
|
||||||
|
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||||
|
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||||
|
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
enum engine_status_constants {
|
enum engine_status_constants {
|
||||||
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
|
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
|
||||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
|
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
|
||||||
|
@ -81,6 +102,8 @@ struct amdgpu_vcn {
|
||||||
unsigned num_enc_rings;
|
unsigned num_enc_rings;
|
||||||
enum amd_powergating_state cur_state;
|
enum amd_powergating_state cur_state;
|
||||||
struct dpg_pause_state pause_state;
|
struct dpg_pause_state pause_state;
|
||||||
|
int (*pause_dpg_mode)(struct amdgpu_device *adev,
|
||||||
|
struct dpg_pause_state *new_state);
|
||||||
};
|
};
|
||||||
|
|
||||||
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
|
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
|
||||||
|
|
|
@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
|
||||||
return clk;
|
return clk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
struct amdgpu_virt *virt = &adev->virt;
|
||||||
|
|
||||||
|
if (virt->ops && virt->ops->init_reg_access_mode)
|
||||||
|
virt->ops->init_reg_access_mode(adev);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
struct amdgpu_virt *virt = &adev->virt;
|
||||||
|
|
||||||
|
if (amdgpu_sriov_vf(adev)
|
||||||
|
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
|
||||||
|
ret = true;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
struct amdgpu_virt *virt = &adev->virt;
|
||||||
|
|
||||||
|
if (amdgpu_sriov_vf(adev)
|
||||||
|
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
|
||||||
|
&& !(amdgpu_sriov_runtime(adev)))
|
||||||
|
ret = true;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
struct amdgpu_virt *virt = &adev->virt;
|
||||||
|
|
||||||
|
if (amdgpu_sriov_vf(adev)
|
||||||
|
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
|
||||||
|
ret = true;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
|
||||||
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* According to the fw feature, some new reg access modes are supported */
|
||||||
|
#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
|
||||||
|
#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
|
||||||
|
#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
|
||||||
|
#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct amdgpu_virt_ops - amdgpu device virt operations
|
* struct amdgpu_virt_ops - amdgpu device virt operations
|
||||||
*/
|
*/
|
||||||
|
@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
|
||||||
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
|
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
|
||||||
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
|
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
|
||||||
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
|
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
|
||||||
|
void (*init_reg_access_mode)(struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -258,6 +265,7 @@ struct amdgpu_virt {
|
||||||
uint32_t gim_feature;
|
uint32_t gim_feature;
|
||||||
/* protect DPM events to GIM */
|
/* protect DPM events to GIM */
|
||||||
struct mutex dpm_mutex;
|
struct mutex dpm_mutex;
|
||||||
|
uint32_t reg_access_mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define amdgpu_sriov_enabled(adev) \
|
#define amdgpu_sriov_enabled(adev) \
|
||||||
|
@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
|
||||||
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
|
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
|
||||||
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
|
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
|
||||||
|
|
||||||
|
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
|
||||||
|
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
|
||||||
|
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
|
||||||
|
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
|
||||||
return &hive->device_list;
|
return &hive->device_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: AMDGPU XGMI Support
|
||||||
|
*
|
||||||
|
* XGMI is a high speed interconnect that joins multiple GPU cards
|
||||||
|
* into a homogeneous memory space that is organized by a collective
|
||||||
|
* hive ID and individual node IDs, both of which are 64-bit numbers.
|
||||||
|
*
|
||||||
|
* The file xgmi_device_id contains the unique per GPU device ID and
|
||||||
|
* is stored in the /sys/class/drm/card${cardno}/device/ directory.
|
||||||
|
*
|
||||||
|
* Inside the device directory a sub-directory 'xgmi_hive_info' is
|
||||||
|
* created which contains the hive ID and the list of nodes.
|
||||||
|
*
|
||||||
|
* The hive ID is stored in:
|
||||||
|
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
|
||||||
|
*
|
||||||
|
* The node information is stored in numbered directories:
|
||||||
|
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
|
||||||
|
*
|
||||||
|
* Each device has their own xgmi_hive_info direction with a mirror
|
||||||
|
* set of node sub-directories.
|
||||||
|
*
|
||||||
|
* The XGMI memory space is built by contiguously adding the power of
|
||||||
|
* two padded VRAM space from each node to each other.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
|
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
|
@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
||||||
/* Each psp need to set the latest topology */
|
/* Each psp need to set the latest topology */
|
||||||
ret = psp_xgmi_set_topology_info(&adev->psp,
|
ret = psp_xgmi_set_topology_info(&adev->psp,
|
||||||
hive->number_devices,
|
hive->number_devices,
|
||||||
&hive->topology_info);
|
&adev->psp.xgmi_context.top_info);
|
||||||
if (ret)
|
if (ret)
|
||||||
dev_err(adev->dev,
|
dev_err(adev->dev,
|
||||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||||
|
@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
||||||
|
struct amdgpu_device *peer_adev)
|
||||||
|
{
|
||||||
|
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0 ; i < top->num_nodes; ++i)
|
||||||
|
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
|
||||||
|
return top->nodes[i].num_hops;
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
struct psp_xgmi_topology_info *hive_topology;
|
struct psp_xgmi_topology_info *top_info;
|
||||||
struct amdgpu_hive_info *hive;
|
struct amdgpu_hive_info *hive;
|
||||||
struct amdgpu_xgmi *entry;
|
struct amdgpu_xgmi *entry;
|
||||||
struct amdgpu_device *tmp_adev = NULL;
|
struct amdgpu_device *tmp_adev = NULL;
|
||||||
|
@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
hive_topology = &hive->topology_info;
|
top_info = &adev->psp.xgmi_context.top_info;
|
||||||
|
|
||||||
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
|
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
|
||||||
list_for_each_entry(entry, &hive->device_list, head)
|
list_for_each_entry(entry, &hive->device_list, head)
|
||||||
hive_topology->nodes[count++].node_id = entry->node_id;
|
top_info->nodes[count++].node_id = entry->node_id;
|
||||||
|
top_info->num_nodes = count;
|
||||||
hive->number_devices = count;
|
hive->number_devices = count;
|
||||||
|
|
||||||
/* Each psp need to get the latest topology */
|
|
||||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
|
/* update node list for other device in the hive */
|
||||||
|
if (tmp_adev != adev) {
|
||||||
|
top_info = &tmp_adev->psp.xgmi_context.top_info;
|
||||||
|
top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
|
||||||
|
top_info->num_nodes = count;
|
||||||
|
}
|
||||||
|
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||||
|
if (ret)
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get latest topology info for each device from psp */
|
||||||
|
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||||
|
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
|
||||||
|
&tmp_adev->psp.xgmi_context.top_info);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(tmp_adev->dev,
|
dev_err(tmp_adev->dev,
|
||||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||||
tmp_adev->gmc.xgmi.node_id,
|
tmp_adev->gmc.xgmi.node_id,
|
||||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||||
/* To do : continue with some node failed or disable the whole hive */
|
/* To do : continue with some node failed or disable the whole hive */
|
||||||
break;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
|
||||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
|
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
|
||||||
|
|
||||||
|
|
||||||
|
mutex_unlock(&hive->hive_lock);
|
||||||
|
exit:
|
||||||
if (!ret)
|
if (!ret)
|
||||||
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
|
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
|
||||||
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
|
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
|
||||||
|
@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||||
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
|
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
|
||||||
ret);
|
ret);
|
||||||
|
|
||||||
|
|
||||||
mutex_unlock(&hive->hive_lock);
|
|
||||||
exit:
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
struct amdgpu_hive_info {
|
struct amdgpu_hive_info {
|
||||||
uint64_t hive_id;
|
uint64_t hive_id;
|
||||||
struct list_head device_list;
|
struct list_head device_list;
|
||||||
struct psp_xgmi_topology_info topology_info;
|
|
||||||
int number_devices;
|
int number_devices;
|
||||||
struct mutex hive_lock, reset_lock;
|
struct mutex hive_lock, reset_lock;
|
||||||
struct kobject *kobj;
|
struct kobject *kobj;
|
||||||
|
@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
||||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||||
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
|
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
|
||||||
|
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
||||||
|
struct amdgpu_device *peer_adev);
|
||||||
|
|
||||||
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
|
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
|
||||||
struct amdgpu_device *bo_adev)
|
struct amdgpu_device *bo_adev)
|
||||||
|
|
|
@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint64_t nak_r, nak_g;
|
||||||
|
|
||||||
|
/* Get the number of NAKs received and generated */
|
||||||
|
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||||
|
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||||
|
|
||||||
|
/* Add the total number of NAKs, i.e the number of replays */
|
||||||
|
return (nak_r + nak_g);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amdgpu_asic_funcs cik_asic_funcs =
|
static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||||
{
|
{
|
||||||
.read_disabled_bios = &cik_read_disabled_bios,
|
.read_disabled_bios = &cik_read_disabled_bios,
|
||||||
|
@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||||
.get_pcie_usage = &cik_get_pcie_usage,
|
.get_pcie_usage = &cik_get_pcie_usage,
|
||||||
.need_reset_on_init = &cik_need_reset_on_init,
|
.need_reset_on_init = &cik_need_reset_on_init,
|
||||||
|
.get_pcie_replay_count = &cik_get_pcie_replay_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int cik_common_early_init(void *handle)
|
static int cik_common_early_init(void *handle)
|
||||||
|
|
|
@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
|
||||||
*flags |= AMD_CG_SUPPORT_DF_MGCG;
|
*flags |= AMD_CG_SUPPORT_DF_MGCG;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* hold counter assignment per gpu struct */
|
||||||
|
struct df_v3_6_event_mask {
|
||||||
|
struct amdgpu_device gpu;
|
||||||
|
uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* get assigned df perfmon ctr as int */
|
||||||
|
static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
int *counter)
|
||||||
|
{
|
||||||
|
struct df_v3_6_event_mask *mask;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||||
|
|
||||||
|
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
|
||||||
|
if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
|
||||||
|
*counter = i;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get address based on counter assignment */
|
||||||
|
static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
int is_ctrl,
|
||||||
|
uint32_t *lo_base_addr,
|
||||||
|
uint32_t *hi_base_addr)
|
||||||
|
{
|
||||||
|
|
||||||
|
int target_cntr = -1;
|
||||||
|
|
||||||
|
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||||
|
|
||||||
|
if (target_cntr < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
switch (target_cntr) {
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
|
||||||
|
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
|
||||||
|
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
|
||||||
|
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
|
||||||
|
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get read counter address */
|
||||||
|
static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
uint32_t *lo_base_addr,
|
||||||
|
uint32_t *hi_base_addr)
|
||||||
|
{
|
||||||
|
df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get control counter settings i.e. address and values to set */
|
||||||
|
static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
uint32_t *lo_base_addr,
|
||||||
|
uint32_t *hi_base_addr,
|
||||||
|
uint32_t *lo_val,
|
||||||
|
uint32_t *hi_val)
|
||||||
|
{
|
||||||
|
|
||||||
|
uint32_t eventsel, instance, unitmask;
|
||||||
|
uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
|
||||||
|
|
||||||
|
if (lo_val == NULL || hi_val == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
|
||||||
|
DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
|
||||||
|
*lo_base_addr, *hi_base_addr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
eventsel = GET_EVENT(config);
|
||||||
|
instance = GET_INSTANCE(config);
|
||||||
|
unitmask = GET_UNITMASK(config);
|
||||||
|
|
||||||
|
es_5_0 = eventsel & 0x3FUL;
|
||||||
|
es_13_6 = instance;
|
||||||
|
es_13_0 = (es_13_6 << 6) + es_5_0;
|
||||||
|
es_13_12 = (es_13_0 & 0x03000UL) >> 12;
|
||||||
|
es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
|
||||||
|
es_7_0 = es_13_0 & 0x0FFUL;
|
||||||
|
*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
|
||||||
|
*hi_val = (es_11_8 | ((es_13_12)<<(29)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* assign df performance counters for read */
|
||||||
|
static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
int *is_assigned)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct df_v3_6_event_mask *mask;
|
||||||
|
int i, target_cntr;
|
||||||
|
|
||||||
|
target_cntr = -1;
|
||||||
|
|
||||||
|
*is_assigned = 0;
|
||||||
|
|
||||||
|
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||||
|
|
||||||
|
if (target_cntr >= 0) {
|
||||||
|
*is_assigned = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||||
|
|
||||||
|
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
|
||||||
|
if (mask->config_assign_mask[i] == 0ULL) {
|
||||||
|
mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* release performance counter */
|
||||||
|
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
|
||||||
|
uint64_t config)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct df_v3_6_event_mask *mask;
|
||||||
|
int target_cntr;
|
||||||
|
|
||||||
|
target_cntr = -1;
|
||||||
|
|
||||||
|
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||||
|
|
||||||
|
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||||
|
|
||||||
|
if (target_cntr >= 0)
|
||||||
|
mask->config_assign_mask[target_cntr] = 0ULL;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get xgmi link counters via programmable data fabric (df) counters (max 4)
|
||||||
|
* using cake tx event.
|
||||||
|
*
|
||||||
|
* @adev -> amdgpu device
|
||||||
|
* @instance-> currently cake has 2 links to poll on vega20
|
||||||
|
* @count -> counters to pass
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||||
|
int instance,
|
||||||
|
uint64_t *count)
|
||||||
|
{
|
||||||
|
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
config = GET_INSTANCE_CONFIG(instance);
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
|
||||||
|
&hi_base_addr);
|
||||||
|
|
||||||
|
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
lo_val = RREG32_PCIE(lo_base_addr);
|
||||||
|
hi_val = RREG32_PCIE(hi_base_addr);
|
||||||
|
|
||||||
|
*count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* reset xgmi link counters
|
||||||
|
*
|
||||||
|
* @adev -> amdgpu device
|
||||||
|
* @instance-> currently cake has 2 links to poll on vega20
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||||
|
int instance)
|
||||||
|
{
|
||||||
|
uint32_t lo_base_addr, hi_base_addr;
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
|
||||||
|
&hi_base_addr);
|
||||||
|
|
||||||
|
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
WREG32_PCIE(lo_base_addr, 0UL);
|
||||||
|
WREG32_PCIE(hi_base_addr, 0UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* add xgmi link counters
|
||||||
|
*
|
||||||
|
* @adev -> amdgpu device
|
||||||
|
* @instance-> currently cake has 2 links to poll on vega20
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||||
|
int instance)
|
||||||
|
{
|
||||||
|
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||||
|
uint64_t config;
|
||||||
|
int ret, is_assigned;
|
||||||
|
|
||||||
|
if (instance < 0 || instance > 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
config = GET_INSTANCE_CONFIG(instance);
|
||||||
|
|
||||||
|
ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
|
||||||
|
|
||||||
|
if (ret || is_assigned)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||||
|
config,
|
||||||
|
&lo_base_addr,
|
||||||
|
&hi_base_addr,
|
||||||
|
&lo_val,
|
||||||
|
&hi_val);
|
||||||
|
|
||||||
|
WREG32_PCIE(lo_base_addr, lo_val);
|
||||||
|
WREG32_PCIE(hi_base_addr, hi_val);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* start xgmi link counters
|
||||||
|
*
|
||||||
|
* @adev -> amdgpu device
|
||||||
|
* @instance-> currently cake has 2 links to poll on vega20
|
||||||
|
* @is_enable -> either resume or assign event via df perfmon
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||||
|
int instance,
|
||||||
|
int is_enable)
|
||||||
|
{
|
||||||
|
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||||
|
uint64_t config;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (instance < 0 || instance > 1)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (is_enable) {
|
||||||
|
|
||||||
|
ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
config = GET_INSTANCE_CONFIG(instance);
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||||
|
config,
|
||||||
|
&lo_base_addr,
|
||||||
|
&hi_base_addr,
|
||||||
|
NULL,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if (lo_base_addr == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
lo_val = RREG32_PCIE(lo_base_addr);
|
||||||
|
|
||||||
|
WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* start xgmi link counters
|
||||||
|
*
|
||||||
|
* @adev -> amdgpu device
|
||||||
|
* @instance-> currently cake has 2 links to poll on vega20
|
||||||
|
* @is_enable -> either pause or unassign event via df perfmon
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||||
|
int instance,
|
||||||
|
int is_disable)
|
||||||
|
{
|
||||||
|
|
||||||
|
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||||
|
uint64_t config;
|
||||||
|
|
||||||
|
config = GET_INSTANCE_CONFIG(instance);
|
||||||
|
|
||||||
|
if (is_disable) {
|
||||||
|
df_v3_6_reset_xgmi_link_cntr(adev, instance);
|
||||||
|
df_v3_6_pmc_release_cntr(adev, config);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||||
|
config,
|
||||||
|
&lo_base_addr,
|
||||||
|
&hi_base_addr,
|
||||||
|
NULL,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
lo_val = RREG32_PCIE(lo_base_addr);
|
||||||
|
|
||||||
|
WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
|
||||||
|
int is_enable)
|
||||||
|
{
|
||||||
|
int xgmi_tx_link, ret = 0;
|
||||||
|
|
||||||
|
switch (adev->asic_type) {
|
||||||
|
case CHIP_VEGA20:
|
||||||
|
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||||
|
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||||
|
|
||||||
|
if (xgmi_tx_link >= 0)
|
||||||
|
ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
|
||||||
|
is_enable);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
|
||||||
|
int is_disable)
|
||||||
|
{
|
||||||
|
int xgmi_tx_link, ret = 0;
|
||||||
|
|
||||||
|
switch (adev->asic_type) {
|
||||||
|
case CHIP_VEGA20:
|
||||||
|
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||||
|
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||||
|
|
||||||
|
if (xgmi_tx_link >= 0) {
|
||||||
|
ret = df_v3_6_stop_xgmi_link_cntr(adev,
|
||||||
|
xgmi_tx_link,
|
||||||
|
is_disable);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
|
||||||
|
uint64_t config,
|
||||||
|
uint64_t *count)
|
||||||
|
{
|
||||||
|
|
||||||
|
int xgmi_tx_link;
|
||||||
|
|
||||||
|
switch (adev->asic_type) {
|
||||||
|
case CHIP_VEGA20:
|
||||||
|
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||||
|
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||||
|
|
||||||
|
if (xgmi_tx_link >= 0) {
|
||||||
|
df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
|
||||||
|
df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
const struct amdgpu_df_funcs df_v3_6_funcs = {
|
const struct amdgpu_df_funcs df_v3_6_funcs = {
|
||||||
.init = df_v3_6_init,
|
.init = df_v3_6_init,
|
||||||
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
|
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
|
||||||
|
@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
|
||||||
.update_medium_grain_clock_gating =
|
.update_medium_grain_clock_gating =
|
||||||
df_v3_6_update_medium_grain_clock_gating,
|
df_v3_6_update_medium_grain_clock_gating,
|
||||||
.get_clockgating_state = df_v3_6_get_clockgating_state,
|
.get_clockgating_state = df_v3_6_get_clockgating_state,
|
||||||
|
.pmc_start = df_v3_6_pmc_start,
|
||||||
|
.pmc_stop = df_v3_6_pmc_stop,
|
||||||
|
.pmc_get_count = df_v3_6_pmc_get_count
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
|
||||||
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
|
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
|
||||||
|
#define AMDGPU_DF_MAX_COUNTERS 4
|
||||||
|
|
||||||
|
/* get flags from df perfmon config */
|
||||||
|
#define GET_EVENT(x) (x & 0xFFUL)
|
||||||
|
#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
|
||||||
|
#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
|
||||||
|
#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \
|
||||||
|
| ((0x046ULL + x) << 8) \
|
||||||
|
| (0x02 << 16))
|
||||||
|
|
||||||
|
/* df event conf macros */
|
||||||
|
#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
|
||||||
|
&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
|
||||||
|
#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
|
||||||
|
&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
|
||||||
|
|
||||||
extern const struct amdgpu_df_funcs df_v3_6_funcs;
|
extern const struct amdgpu_df_funcs df_v3_6_funcs;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle)
|
||||||
|
|
||||||
static int gfx_v7_0_sw_fini(void *handle)
|
static int gfx_v7_0_sw_fini(void *handle)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
int i;
|
||||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
|
||||||
|
|
||||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||||
|
@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
|
||||||
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
|
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
/* init asci gds info */
|
/* init asci gds info */
|
||||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
|
||||||
adev->gds.gws.total_size = 64;
|
adev->gds.gws_size = 64;
|
||||||
adev->gds.oa.total_size = 16;
|
adev->gds.oa_size = 16;
|
||||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||||
|
|
||||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
|
||||||
adev->gds.mem.gfx_partition_size = 4096;
|
|
||||||
adev->gds.mem.cs_partition_size = 4096;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 4;
|
|
||||||
adev->gds.gws.cs_partition_size = 4;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 1;
|
|
||||||
} else {
|
|
||||||
adev->gds.mem.gfx_partition_size = 1024;
|
|
||||||
adev->gds.mem.cs_partition_size = 1024;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 16;
|
|
||||||
adev->gds.gws.cs_partition_size = 16;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle)
|
||||||
|
|
||||||
static int gfx_v8_0_sw_fini(void *handle)
|
static int gfx_v8_0_sw_fini(void *handle)
|
||||||
{
|
{
|
||||||
int i;
|
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
int i;
|
||||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
|
||||||
|
|
||||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||||
|
@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
|
||||||
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
|
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
/* init asci gds info */
|
/* init asci gds info */
|
||||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
|
||||||
adev->gds.gws.total_size = 64;
|
adev->gds.gws_size = 64;
|
||||||
adev->gds.oa.total_size = 16;
|
adev->gds.oa_size = 16;
|
||||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||||
|
|
||||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
|
||||||
adev->gds.mem.gfx_partition_size = 4096;
|
|
||||||
adev->gds.mem.cs_partition_size = 4096;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 4;
|
|
||||||
adev->gds.gws.cs_partition_size = 4;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 1;
|
|
||||||
} else {
|
|
||||||
adev->gds.mem.gfx_partition_size = 1024;
|
|
||||||
adev->gds.mem.cs_partition_size = 1024;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 16;
|
|
||||||
adev->gds.gws.cs_partition_size = 16;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "vega10_enum.h"
|
#include "vega10_enum.h"
|
||||||
#include "hdp/hdp_4_0_offset.h"
|
#include "hdp/hdp_4_0_offset.h"
|
||||||
|
|
||||||
|
#include "soc15.h"
|
||||||
#include "soc15_common.h"
|
#include "soc15_common.h"
|
||||||
#include "clearstate_gfx9.h"
|
#include "clearstate_gfx9.h"
|
||||||
#include "v9_structs.h"
|
#include "v9_structs.h"
|
||||||
|
@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
switch (adev->asic_type) {
|
switch (adev->asic_type) {
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
soc15_program_register_sequence(adev,
|
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||||
golden_settings_gc_9_0,
|
soc15_program_register_sequence(adev,
|
||||||
ARRAY_SIZE(golden_settings_gc_9_0));
|
golden_settings_gc_9_0,
|
||||||
soc15_program_register_sequence(adev,
|
ARRAY_SIZE(golden_settings_gc_9_0));
|
||||||
golden_settings_gc_9_0_vg10,
|
soc15_program_register_sequence(adev,
|
||||||
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
|
golden_settings_gc_9_0_vg10,
|
||||||
|
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
soc15_program_register_sequence(adev,
|
soc15_program_register_sequence(adev,
|
||||||
|
@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
|
||||||
|
|
||||||
/* GDS reserve memory: 64 bytes alignment */
|
/* GDS reserve memory: 64 bytes alignment */
|
||||||
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
|
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
|
||||||
adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
|
adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
|
||||||
adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
|
|
||||||
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
|
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
|
||||||
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
|
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
|
||||||
|
|
||||||
|
@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
|
||||||
|
|
||||||
gfx_v9_0_write_data_to_reg(ring, 0, false,
|
gfx_v9_0_write_data_to_reg(ring, 0, false,
|
||||||
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
|
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
|
||||||
(adev->gds.mem.total_size +
|
(adev->gds.gds_size +
|
||||||
adev->gfx.ngg.gds_reserve_size));
|
adev->gfx.ngg.gds_reserve_size));
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
|
||||||
|
@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle)
|
||||||
kfree(ras_if);
|
kfree(ras_if);
|
||||||
}
|
}
|
||||||
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
|
||||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
|
||||||
|
|
||||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||||
|
@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
|
||||||
else
|
else
|
||||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
|
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
|
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
|
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
|
||||||
|
@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
|
||||||
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
|
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
|
||||||
soc15_grbm_select(adev, 0, 0, 0, i);
|
soc15_grbm_select(adev, 0, 0, 0, i);
|
||||||
/* CP and shaders */
|
/* CP and shaders */
|
||||||
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
|
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
|
||||||
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
|
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
|
||||||
}
|
}
|
||||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||||
mutex_unlock(&adev->srbm_mutex);
|
mutex_unlock(&adev->srbm_mutex);
|
||||||
|
@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
|
||||||
u32 tmp;
|
u32 tmp;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
|
WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
|
||||||
|
|
||||||
gfx_v9_0_tiling_mode_table_init(adev);
|
gfx_v9_0_tiling_mode_table_init(adev);
|
||||||
|
|
||||||
|
@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
|
||||||
*/
|
*/
|
||||||
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
|
WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
|
||||||
(adev->gfx.config.sc_prim_fifo_size_frontend <<
|
(adev->gfx.config.sc_prim_fifo_size_frontend <<
|
||||||
PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
|
PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
|
||||||
(adev->gfx.config.sc_prim_fifo_size_backend <<
|
(adev->gfx.config.sc_prim_fifo_size_backend <<
|
||||||
|
@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
|
||||||
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
|
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
/* csib */
|
/* csib */
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
|
||||||
adev->gfx.rlc.clear_state_gpu_addr >> 32);
|
adev->gfx.rlc.clear_state_gpu_addr >> 32);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
|
||||||
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
|
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
|
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
|
||||||
adev->gfx.rlc.clear_state_size);
|
adev->gfx.rlc.clear_state_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
||||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||||
}
|
}
|
||||||
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
|
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
|
||||||
udelay(50);
|
udelay(50);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (enable) {
|
if (enable) {
|
||||||
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
|
||||||
} else {
|
} else {
|
||||||
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
|
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
|
||||||
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
||||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||||
adev->gfx.compute_ring[i].sched.ready = false;
|
adev->gfx.compute_ring[i].sched.ready = false;
|
||||||
|
@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
|
||||||
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
|
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
|
||||||
tmp &= 0xffffff00;
|
tmp &= 0xffffff00;
|
||||||
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
|
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
|
||||||
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||||
tmp |= 0x80;
|
tmp |= 0x80;
|
||||||
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
||||||
|
@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
|
||||||
/* disable wptr polling */
|
/* disable wptr polling */
|
||||||
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
|
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
||||||
mqd->cp_hqd_eop_base_addr_lo);
|
mqd->cp_hqd_eop_base_addr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
||||||
mqd->cp_hqd_eop_base_addr_hi);
|
mqd->cp_hqd_eop_base_addr_hi);
|
||||||
|
|
||||||
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
|
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
|
||||||
mqd->cp_hqd_eop_control);
|
mqd->cp_hqd_eop_control);
|
||||||
|
|
||||||
/* enable doorbell? */
|
/* enable doorbell? */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
||||||
mqd->cp_hqd_pq_doorbell_control);
|
mqd->cp_hqd_pq_doorbell_control);
|
||||||
|
|
||||||
/* disable the queue if it's active */
|
/* disable the queue if it's active */
|
||||||
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||||
for (j = 0; j < adev->usec_timeout; j++) {
|
for (j = 0; j < adev->usec_timeout; j++) {
|
||||||
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
||||||
break;
|
break;
|
||||||
udelay(1);
|
udelay(1);
|
||||||
}
|
}
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||||
mqd->cp_hqd_dequeue_request);
|
mqd->cp_hqd_dequeue_request);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
|
||||||
mqd->cp_hqd_pq_rptr);
|
mqd->cp_hqd_pq_rptr);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||||
mqd->cp_hqd_pq_wptr_lo);
|
mqd->cp_hqd_pq_wptr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||||
mqd->cp_hqd_pq_wptr_hi);
|
mqd->cp_hqd_pq_wptr_hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set the pointer to the MQD */
|
/* set the pointer to the MQD */
|
||||||
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
|
||||||
mqd->cp_mqd_base_addr_lo);
|
mqd->cp_mqd_base_addr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
|
||||||
mqd->cp_mqd_base_addr_hi);
|
mqd->cp_mqd_base_addr_hi);
|
||||||
|
|
||||||
/* set MQD vmid to 0 */
|
/* set MQD vmid to 0 */
|
||||||
WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
|
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
|
||||||
mqd->cp_mqd_control);
|
mqd->cp_mqd_control);
|
||||||
|
|
||||||
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
|
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
|
||||||
mqd->cp_hqd_pq_base_lo);
|
mqd->cp_hqd_pq_base_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
|
||||||
mqd->cp_hqd_pq_base_hi);
|
mqd->cp_hqd_pq_base_hi);
|
||||||
|
|
||||||
/* set up the HQD, this is similar to CP_RB0_CNTL */
|
/* set up the HQD, this is similar to CP_RB0_CNTL */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
|
||||||
mqd->cp_hqd_pq_control);
|
mqd->cp_hqd_pq_control);
|
||||||
|
|
||||||
/* set the wb address whether it's enabled or not */
|
/* set the wb address whether it's enabled or not */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
|
||||||
mqd->cp_hqd_pq_rptr_report_addr_lo);
|
mqd->cp_hqd_pq_rptr_report_addr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
|
||||||
mqd->cp_hqd_pq_rptr_report_addr_hi);
|
mqd->cp_hqd_pq_rptr_report_addr_hi);
|
||||||
|
|
||||||
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
|
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
|
||||||
mqd->cp_hqd_pq_wptr_poll_addr_lo);
|
mqd->cp_hqd_pq_wptr_poll_addr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
|
||||||
mqd->cp_hqd_pq_wptr_poll_addr_hi);
|
mqd->cp_hqd_pq_wptr_poll_addr_hi);
|
||||||
|
|
||||||
/* enable the doorbell if requested */
|
/* enable the doorbell if requested */
|
||||||
|
@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
|
||||||
mqd->cp_hqd_pq_doorbell_control);
|
mqd->cp_hqd_pq_doorbell_control);
|
||||||
|
|
||||||
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
|
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||||
mqd->cp_hqd_pq_wptr_lo);
|
mqd->cp_hqd_pq_wptr_lo);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||||
mqd->cp_hqd_pq_wptr_hi);
|
mqd->cp_hqd_pq_wptr_hi);
|
||||||
|
|
||||||
/* set the vmid for the queue */
|
/* set the vmid for the queue */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
|
||||||
mqd->cp_hqd_persistent_state);
|
mqd->cp_hqd_persistent_state);
|
||||||
|
|
||||||
/* activate the queue */
|
/* activate the queue */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
|
||||||
mqd->cp_hqd_active);
|
mqd->cp_hqd_active);
|
||||||
|
|
||||||
if (ring->use_doorbell)
|
if (ring->use_doorbell)
|
||||||
|
@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
|
||||||
/* disable the queue if it's active */
|
/* disable the queue if it's active */
|
||||||
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||||
|
|
||||||
for (j = 0; j < adev->usec_timeout; j++) {
|
for (j = 0; j < adev->usec_timeout; j++) {
|
||||||
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
||||||
|
@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
|
||||||
DRM_DEBUG("KIQ dequeue request failed.\n");
|
DRM_DEBUG("KIQ dequeue request failed.\n");
|
||||||
|
|
||||||
/* Manual disable if dequeue request times out */
|
/* Manual disable if dequeue request times out */
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||||
0);
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
|
||||||
(1 << (oa_size + oa_base)) - (1 << oa_base));
|
(1 << (oa_size + oa_base)) - (1 << oa_base));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const u32 vgpr_init_compute_shader[] =
|
||||||
|
{
|
||||||
|
0xb07c0000, 0xbe8000ff,
|
||||||
|
0x000000f8, 0xbf110800,
|
||||||
|
0x7e000280, 0x7e020280,
|
||||||
|
0x7e040280, 0x7e060280,
|
||||||
|
0x7e080280, 0x7e0a0280,
|
||||||
|
0x7e0c0280, 0x7e0e0280,
|
||||||
|
0x80808800, 0xbe803200,
|
||||||
|
0xbf84fff5, 0xbf9c0000,
|
||||||
|
0xd28c0001, 0x0001007f,
|
||||||
|
0xd28d0001, 0x0002027e,
|
||||||
|
0x10020288, 0xb8810904,
|
||||||
|
0xb7814000, 0xd1196a01,
|
||||||
|
0x00000301, 0xbe800087,
|
||||||
|
0xbefc00c1, 0xd89c4000,
|
||||||
|
0x00020201, 0xd89cc080,
|
||||||
|
0x00040401, 0x320202ff,
|
||||||
|
0x00000800, 0x80808100,
|
||||||
|
0xbf84fff8, 0x7e020280,
|
||||||
|
0xbf810000, 0x00000000,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const u32 sgpr_init_compute_shader[] =
|
||||||
|
{
|
||||||
|
0xb07c0000, 0xbe8000ff,
|
||||||
|
0x0000005f, 0xbee50080,
|
||||||
|
0xbe812c65, 0xbe822c65,
|
||||||
|
0xbe832c65, 0xbe842c65,
|
||||||
|
0xbe852c65, 0xb77c0005,
|
||||||
|
0x80808500, 0xbf84fff8,
|
||||||
|
0xbe800080, 0xbf810000,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct soc15_reg_entry vgpr_init_regs[] = {
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct soc15_reg_entry sgpr_init_regs[] = {
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct soc15_reg_entry sec_ded_counter_registers[] = {
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
|
||||||
|
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
|
||||||
|
};
|
||||||
|
|
||||||
|
static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
|
||||||
|
struct amdgpu_ib ib;
|
||||||
|
struct dma_fence *f = NULL;
|
||||||
|
int r, i, j;
|
||||||
|
unsigned total_size, vgpr_offset, sgpr_offset;
|
||||||
|
u64 gpu_addr;
|
||||||
|
|
||||||
|
/* only support when RAS is enabled */
|
||||||
|
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* bail if the compute ring is not ready */
|
||||||
|
if (!ring->sched.ready)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
total_size =
|
||||||
|
((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
|
||||||
|
total_size +=
|
||||||
|
((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
|
||||||
|
total_size = ALIGN(total_size, 256);
|
||||||
|
vgpr_offset = total_size;
|
||||||
|
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
|
||||||
|
sgpr_offset = total_size;
|
||||||
|
total_size += sizeof(sgpr_init_compute_shader);
|
||||||
|
|
||||||
|
/* allocate an indirect buffer to put the commands in */
|
||||||
|
memset(&ib, 0, sizeof(ib));
|
||||||
|
r = amdgpu_ib_get(adev, NULL, total_size, &ib);
|
||||||
|
if (r) {
|
||||||
|
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* load the compute shaders */
|
||||||
|
for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
|
||||||
|
ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
|
||||||
|
ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
|
||||||
|
|
||||||
|
/* init the ib length to 0 */
|
||||||
|
ib.length_dw = 0;
|
||||||
|
|
||||||
|
/* VGPR */
|
||||||
|
/* write the register state for the compute dispatch */
|
||||||
|
for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
|
||||||
|
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
|
||||||
|
- PACKET3_SET_SH_REG_START;
|
||||||
|
ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
|
||||||
|
}
|
||||||
|
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
|
||||||
|
gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
|
||||||
|
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
|
||||||
|
- PACKET3_SET_SH_REG_START;
|
||||||
|
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
|
||||||
|
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
|
||||||
|
|
||||||
|
/* write dispatch packet */
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
|
||||||
|
ib.ptr[ib.length_dw++] = 128; /* x */
|
||||||
|
ib.ptr[ib.length_dw++] = 1; /* y */
|
||||||
|
ib.ptr[ib.length_dw++] = 1; /* z */
|
||||||
|
ib.ptr[ib.length_dw++] =
|
||||||
|
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
|
||||||
|
|
||||||
|
/* write CS partial flush packet */
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
|
||||||
|
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
|
||||||
|
|
||||||
|
/* SGPR */
|
||||||
|
/* write the register state for the compute dispatch */
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
|
||||||
|
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
|
||||||
|
- PACKET3_SET_SH_REG_START;
|
||||||
|
ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
|
||||||
|
}
|
||||||
|
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
|
||||||
|
gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
|
||||||
|
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
|
||||||
|
- PACKET3_SET_SH_REG_START;
|
||||||
|
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
|
||||||
|
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
|
||||||
|
|
||||||
|
/* write dispatch packet */
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
|
||||||
|
ib.ptr[ib.length_dw++] = 128; /* x */
|
||||||
|
ib.ptr[ib.length_dw++] = 1; /* y */
|
||||||
|
ib.ptr[ib.length_dw++] = 1; /* z */
|
||||||
|
ib.ptr[ib.length_dw++] =
|
||||||
|
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
|
||||||
|
|
||||||
|
/* write CS partial flush packet */
|
||||||
|
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
|
||||||
|
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
|
||||||
|
|
||||||
|
/* shedule the ib on the ring */
|
||||||
|
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
|
||||||
|
if (r) {
|
||||||
|
DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* wait for the GPU to finish processing the IB */
|
||||||
|
r = dma_fence_wait(f, false);
|
||||||
|
if (r) {
|
||||||
|
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* read back registers to clear the counters */
|
||||||
|
mutex_lock(&adev->grbm_idx_mutex);
|
||||||
|
for (j = 0; j < 16; j++) {
|
||||||
|
gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||||
|
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||||
|
gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||||
|
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||||
|
gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||||
|
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||||
|
gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
|
||||||
|
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||||
|
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||||
|
}
|
||||||
|
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
|
||||||
|
mutex_unlock(&adev->grbm_idx_mutex);
|
||||||
|
|
||||||
|
fail:
|
||||||
|
amdgpu_ib_free(adev, &ib, NULL);
|
||||||
|
dma_fence_put(f);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int gfx_v9_0_early_init(void *handle)
|
static int gfx_v9_0_early_init(void *handle)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*ras_if)
|
/* requires IBs so do in late init after IB pool is initialized */
|
||||||
|
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
/* handle resume path. */
|
||||||
|
if (*ras_if) {
|
||||||
|
/* resend ras TA enable cmd during resume.
|
||||||
|
* prepare to handle failure.
|
||||||
|
*/
|
||||||
|
ih_info.head = **ras_if;
|
||||||
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
/* request a gpu reset. will run again. */
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__GFX);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* fail to enable ras, cleanup all. */
|
||||||
|
goto irq;
|
||||||
|
}
|
||||||
|
/* enable successfully. continue. */
|
||||||
goto resume;
|
goto resume;
|
||||||
|
}
|
||||||
|
|
||||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||||
if (!*ras_if)
|
if (!*ras_if)
|
||||||
|
@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
||||||
**ras_if = ras_block;
|
**ras_if = ras_block;
|
||||||
|
|
||||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
if (r)
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__GFX);
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
goto feature;
|
goto feature;
|
||||||
|
}
|
||||||
|
|
||||||
ih_info.head = **ras_if;
|
ih_info.head = **ras_if;
|
||||||
fs_info.head = **ras_if;
|
fs_info.head = **ras_if;
|
||||||
|
@ -3614,7 +3876,7 @@ interrupt:
|
||||||
feature:
|
feature:
|
||||||
kfree(*ras_if);
|
kfree(*ras_if);
|
||||||
*ras_if = NULL;
|
*ras_if = NULL;
|
||||||
return -EINVAL;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gfx_v9_0_late_init(void *handle)
|
static int gfx_v9_0_late_init(void *handle)
|
||||||
|
@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
|
||||||
mutex_lock(&adev->srbm_mutex);
|
mutex_lock(&adev->srbm_mutex);
|
||||||
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
|
||||||
WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
|
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
|
||||||
|
|
||||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||||
mutex_unlock(&adev->srbm_mutex);
|
mutex_unlock(&adev->srbm_mutex);
|
||||||
|
@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
adev->gds.mem.total_size = 0x10000;
|
adev->gds.gds_size = 0x10000;
|
||||||
break;
|
break;
|
||||||
case CHIP_RAVEN:
|
case CHIP_RAVEN:
|
||||||
adev->gds.mem.total_size = 0x1000;
|
adev->gds.gds_size = 0x1000;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
adev->gds.mem.total_size = 0x10000;
|
adev->gds.gds_size = 0x10000;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
adev->gds.gws.total_size = 64;
|
adev->gds.gws_size = 64;
|
||||||
adev->gds.oa.total_size = 16;
|
adev->gds.oa_size = 16;
|
||||||
|
|
||||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
|
||||||
adev->gds.mem.gfx_partition_size = 4096;
|
|
||||||
adev->gds.mem.cs_partition_size = 4096;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 4;
|
|
||||||
adev->gds.gws.cs_partition_size = 4;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 1;
|
|
||||||
} else {
|
|
||||||
adev->gds.mem.gfx_partition_size = 1024;
|
|
||||||
adev->gds.mem.cs_partition_size = 1024;
|
|
||||||
|
|
||||||
adev->gds.gws.gfx_partition_size = 16;
|
|
||||||
adev->gds.gws.cs_partition_size = 16;
|
|
||||||
|
|
||||||
adev->gds.oa.gfx_partition_size = 4;
|
|
||||||
adev->gds.oa.cs_partition_size = 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||||
|
|
|
@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
||||||
uint64_t value;
|
uint64_t value;
|
||||||
|
|
||||||
/* Program the AGP BAR */
|
/* Program the AGP BAR */
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
|
||||||
|
|
||||||
/* Program the system aperture low logical page number. */
|
/* Program the system aperture low logical page number. */
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
||||||
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
|
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
|
||||||
|
|
||||||
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
|
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
|
||||||
|
@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
||||||
* workaround that increase system aperture high address (add 1)
|
* workaround that increase system aperture high address (add 1)
|
||||||
* to get rid of the VM fault and hardware hang.
|
* to get rid of the VM fault and hardware hang.
|
||||||
*/
|
*/
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||||
max((adev->gmc.fb_end >> 18) + 0x1,
|
max((adev->gmc.fb_end >> 18) + 0x1,
|
||||||
adev->gmc.agp_end >> 18));
|
adev->gmc.agp_end >> 18));
|
||||||
else
|
else
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||||
|
|
||||||
/* Set default page address. */
|
/* Set default page address. */
|
||||||
|
@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
|
||||||
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
||||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
|
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
|
||||||
|
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||||
|
@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
|
||||||
* VF copy registers so vbios post doesn't program them, for
|
* VF copy registers so vbios post doesn't program them, for
|
||||||
* SRIOV driver need to program them
|
* SRIOV driver need to program them
|
||||||
*/
|
*/
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
|
||||||
adev->gmc.vram_start >> 24);
|
adev->gmc.vram_start >> 24);
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
|
||||||
adev->gmc.vram_end >> 24);
|
adev->gmc.vram_end >> 24);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
||||||
MC_VM_MX_L1_TLB_CNTL,
|
MC_VM_MX_L1_TLB_CNTL,
|
||||||
ENABLE_ADVANCED_DRIVER_MODEL,
|
ENABLE_ADVANCED_DRIVER_MODEL,
|
||||||
0);
|
0);
|
||||||
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||||
|
|
||||||
/* Setup L2 cache */
|
/* Setup L2 cache */
|
||||||
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||||
|
|
|
@ -289,7 +289,7 @@ out:
|
||||||
*
|
*
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer
|
||||||
*
|
*
|
||||||
* Load the GDDR MC ucode into the hw (CIK).
|
* Load the GDDR MC ucode into the hw (VI).
|
||||||
* Returns 0 on success, error on failure.
|
* Returns 0 on success, error on failure.
|
||||||
*/
|
*/
|
||||||
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
|
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
|
||||||
|
@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer
|
||||||
*
|
*
|
||||||
* Set the location of vram, gart, and AGP in the GPU's
|
* Set the location of vram, gart, and AGP in the GPU's
|
||||||
* physical address space (CIK).
|
* physical address space (VI).
|
||||||
*/
|
*/
|
||||||
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
|
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
|
@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer
|
||||||
*
|
*
|
||||||
* Look up the amount of vram, vram width, and decide how to place
|
* Look up the amount of vram, vram width, and decide how to place
|
||||||
* vram and gart within the GPU's physical address space (CIK).
|
* vram and gart within the GPU's physical address space (VI).
|
||||||
* Returns 0 for success.
|
* Returns 0 for success.
|
||||||
*/
|
*/
|
||||||
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
||||||
|
@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer
|
||||||
* @vmid: vm instance to flush
|
* @vmid: vm instance to flush
|
||||||
*
|
*
|
||||||
* Flush the TLB for the requested page table (CIK).
|
* Flush the TLB for the requested page table (VI).
|
||||||
*/
|
*/
|
||||||
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||||
uint32_t vmid, uint32_t flush_type)
|
uint32_t vmid, uint32_t flush_type)
|
||||||
|
@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
|
||||||
* This sets up the TLBs, programs the page tables for VMID0,
|
* This sets up the TLBs, programs the page tables for VMID0,
|
||||||
* sets up the hw for VMIDs 1-15 which are allocated on
|
* sets up the hw for VMIDs 1-15 which are allocated on
|
||||||
* demand, and sets up the global locations for the LDS, GDS,
|
* demand, and sets up the global locations for the LDS, GDS,
|
||||||
* and GPUVM for FSA64 clients (CIK).
|
* and GPUVM for FSA64 clients (VI).
|
||||||
* Returns 0 for success, errors for failure.
|
* Returns 0 for success, errors for failure.
|
||||||
*/
|
*/
|
||||||
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
|
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
|
||||||
|
@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
|
||||||
*
|
*
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer
|
||||||
*
|
*
|
||||||
* This disables all VM page table (CIK).
|
* This disables all VM page table (VI).
|
||||||
*/
|
*/
|
||||||
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
|
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
|
@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
|
||||||
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
|
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
|
||||||
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
|
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
|
||||||
*
|
*
|
||||||
* Print human readable fault information (CIK).
|
* Print human readable fault information (VI).
|
||||||
*/
|
*/
|
||||||
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
|
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
|
||||||
u32 addr, u32 mc_client, unsigned pasid)
|
u32 addr, u32 mc_client, unsigned pasid)
|
||||||
|
|
|
@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* handle resume path. */
|
/* handle resume path. */
|
||||||
if (*ras_if)
|
if (*ras_if) {
|
||||||
|
/* resend ras TA enable cmd during resume.
|
||||||
|
* prepare to handle failure.
|
||||||
|
*/
|
||||||
|
ih_info.head = **ras_if;
|
||||||
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
/* request a gpu reset. will run again. */
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__UMC);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* fail to enable ras, cleanup all. */
|
||||||
|
goto irq;
|
||||||
|
}
|
||||||
|
/* enable successfully. continue. */
|
||||||
goto resume;
|
goto resume;
|
||||||
|
}
|
||||||
|
|
||||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||||
if (!*ras_if)
|
if (!*ras_if)
|
||||||
|
@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
||||||
**ras_if = ras_block;
|
**ras_if = ras_block;
|
||||||
|
|
||||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
if (r)
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__UMC);
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
goto feature;
|
goto feature;
|
||||||
|
}
|
||||||
|
|
||||||
ih_info.head = **ras_if;
|
ih_info.head = **ras_if;
|
||||||
fs_info.head = **ras_if;
|
fs_info.head = **ras_if;
|
||||||
|
@ -731,7 +754,7 @@ interrupt:
|
||||||
feature:
|
feature:
|
||||||
kfree(*ras_if);
|
kfree(*ras_if);
|
||||||
*ras_if = NULL;
|
*ras_if = NULL;
|
||||||
return -EINVAL;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
|
||||||
|
|
||||||
switch (adev->asic_type) {
|
switch (adev->asic_type) {
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
|
if (amdgpu_virt_support_skip_setting(adev))
|
||||||
|
break;
|
||||||
|
/* fall through */
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
soc15_program_register_sequence(adev,
|
soc15_program_register_sequence(adev,
|
||||||
golden_settings_mmhub_1_0_0,
|
golden_settings_mmhub_1_0_0,
|
||||||
|
@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
|
||||||
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
|
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
|
||||||
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
|
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
|
||||||
|
|
||||||
|
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
|
||||||
|
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
|
||||||
|
|
||||||
/* After HDP is initialized, flush HDP.*/
|
/* After HDP is initialized, flush HDP.*/
|
||||||
adev->nbio_funcs->hdp_flush(adev, NULL);
|
adev->nbio_funcs->hdp_flush(adev, NULL);
|
||||||
|
|
||||||
|
|
|
@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
||||||
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||||
|
|
||||||
|
if (amdgpu_virt_support_skip_setting(adev))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Set default page address. */
|
/* Set default page address. */
|
||||||
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
|
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
|
||||||
adev->vm_manager.vram_base_offset;
|
adev->vm_manager.vram_base_offset;
|
||||||
|
@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
uint32_t tmp;
|
uint32_t tmp;
|
||||||
|
|
||||||
|
if (amdgpu_virt_support_skip_setting(adev))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Setup L2 cache */
|
/* Setup L2 cache */
|
||||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
|
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
|
||||||
|
@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
|
||||||
|
|
||||||
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
|
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
|
if (amdgpu_virt_support_skip_setting(adev))
|
||||||
|
return;
|
||||||
|
|
||||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
||||||
0XFFFFFFFF);
|
0XFFFFFFFF);
|
||||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
|
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
|
||||||
|
@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
||||||
0);
|
0);
|
||||||
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||||
|
|
||||||
/* Setup L2 cache */
|
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
/* Setup L2 cache */
|
||||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
|
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
|
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
|
||||||
|
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
||||||
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
||||||
{
|
{
|
||||||
u32 tmp;
|
u32 tmp;
|
||||||
|
|
||||||
|
if (amdgpu_virt_support_skip_setting(adev))
|
||||||
|
return;
|
||||||
|
|
||||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
|
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
|
||||||
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
|
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
|
||||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "nbio/nbio_6_1_sh_mask.h"
|
#include "nbio/nbio_6_1_sh_mask.h"
|
||||||
#include "gc/gc_9_0_offset.h"
|
#include "gc/gc_9_0_offset.h"
|
||||||
#include "gc/gc_9_0_sh_mask.h"
|
#include "gc/gc_9_0_sh_mask.h"
|
||||||
|
#include "mp/mp_9_0_offset.h"
|
||||||
#include "soc15.h"
|
#include "soc15.h"
|
||||||
#include "vega10_ih.h"
|
#include "vega10_ih.h"
|
||||||
#include "soc15_common.h"
|
#include "soc15_common.h"
|
||||||
|
@ -343,7 +344,7 @@ flr_done:
|
||||||
|
|
||||||
/* Trigger recovery for world switch failure if no TDR */
|
/* Trigger recovery for world switch failure if no TDR */
|
||||||
if (amdgpu_device_should_recover_gpu(adev)
|
if (amdgpu_device_should_recover_gpu(adev)
|
||||||
&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
|
&& adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
|
||||||
amdgpu_device_gpu_recover(adev, NULL);
|
amdgpu_device_gpu_recover(adev, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
|
||||||
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
|
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
|
||||||
|
uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
|
||||||
|
|
||||||
|
adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
|
||||||
|
|
||||||
|
if (rlc_fw_ver >= 0x5d)
|
||||||
|
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
|
||||||
|
|
||||||
|
if (sos_fw_ver >= 0x80455)
|
||||||
|
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
|
||||||
|
|
||||||
|
if (sos_fw_ver >= 0x8045b)
|
||||||
|
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
|
||||||
|
}
|
||||||
|
|
||||||
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
||||||
.req_full_gpu = xgpu_ai_request_full_gpu_access,
|
.req_full_gpu = xgpu_ai_request_full_gpu_access,
|
||||||
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
|
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
|
||||||
|
@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
||||||
.trans_msg = xgpu_ai_mailbox_trans_msg,
|
.trans_msg = xgpu_ai_mailbox_trans_msg,
|
||||||
.get_pp_clk = xgpu_ai_get_pp_clk,
|
.get_pp_clk = xgpu_ai_get_pp_clk,
|
||||||
.force_dpm_level = xgpu_ai_force_dpm_level,
|
.force_dpm_level = xgpu_ai_force_dpm_level,
|
||||||
|
.init_reg_access_mode = xgpu_ai_init_reg_access_mode,
|
||||||
};
|
};
|
||||||
|
|
|
@ -29,9 +29,18 @@
|
||||||
#include "nbio/nbio_7_0_sh_mask.h"
|
#include "nbio/nbio_7_0_sh_mask.h"
|
||||||
#include "nbio/nbio_7_0_smn.h"
|
#include "nbio/nbio_7_0_smn.h"
|
||||||
#include "vega10_enum.h"
|
#include "vega10_enum.h"
|
||||||
|
#include <uapi/linux/kfd_ioctl.h>
|
||||||
|
|
||||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
|
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
|
||||||
|
|
||||||
|
static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
|
||||||
|
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
|
||||||
|
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
|
||||||
|
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
|
||||||
|
}
|
||||||
|
|
||||||
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
|
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||||
|
@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
|
||||||
struct amdgpu_ring *ring)
|
struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
if (!ring || !ring->funcs->emit_wreg)
|
if (!ring || !ring->funcs->emit_wreg)
|
||||||
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
|
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||||
else
|
else
|
||||||
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
|
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||||
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
|
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
|
||||||
|
@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
|
||||||
.ih_control = nbio_v7_0_ih_control,
|
.ih_control = nbio_v7_0_ih_control,
|
||||||
.init_registers = nbio_v7_0_init_registers,
|
.init_registers = nbio_v7_0_init_registers,
|
||||||
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
|
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
|
||||||
|
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,9 +27,18 @@
|
||||||
#include "nbio/nbio_7_4_offset.h"
|
#include "nbio/nbio_7_4_offset.h"
|
||||||
#include "nbio/nbio_7_4_sh_mask.h"
|
#include "nbio/nbio_7_4_sh_mask.h"
|
||||||
#include "nbio/nbio_7_4_0_smn.h"
|
#include "nbio/nbio_7_4_0_smn.h"
|
||||||
|
#include <uapi/linux/kfd_ioctl.h>
|
||||||
|
|
||||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
|
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
|
||||||
|
|
||||||
|
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
|
||||||
|
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
|
||||||
|
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
|
||||||
|
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
|
||||||
|
}
|
||||||
|
|
||||||
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
|
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||||
|
@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
|
||||||
struct amdgpu_ring *ring)
|
struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
if (!ring || !ring->funcs->emit_wreg)
|
if (!ring || !ring->funcs->emit_wreg)
|
||||||
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
|
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||||
else
|
else
|
||||||
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
|
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||||
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
|
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
|
||||||
|
@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
|
||||||
.ih_control = nbio_v7_4_ih_control,
|
.ih_control = nbio_v7_4_ih_control,
|
||||||
.init_registers = nbio_v7_4_init_registers,
|
.init_registers = nbio_v7_4_init_registers,
|
||||||
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
|
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
|
||||||
|
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
|
||||||
};
|
};
|
||||||
|
|
|
@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
|
||||||
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
|
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
|
||||||
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
|
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
|
||||||
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
|
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
|
||||||
|
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
|
||||||
enum psp_gfx_fw_type fw_type; /* FW type */
|
enum psp_gfx_fw_type fw_type; /* FW type */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Command to setup register program */
|
||||||
|
struct psp_gfx_cmd_reg_prog {
|
||||||
|
uint32_t reg_value;
|
||||||
|
uint32_t reg_id;
|
||||||
|
};
|
||||||
|
|
||||||
/* All GFX ring buffer commands. */
|
/* All GFX ring buffer commands. */
|
||||||
union psp_gfx_commands
|
union psp_gfx_commands
|
||||||
{
|
{
|
||||||
|
@ -226,6 +233,7 @@ union psp_gfx_commands
|
||||||
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
|
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
|
||||||
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
|
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
|
||||||
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
|
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
|
||||||
|
struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
|
||||||
|
|
||||||
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
|
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
|
||||||
|
|
||||||
|
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
|
||||||
|
static int psp_v3_1_ring_stop(struct psp_context *psp,
|
||||||
|
enum psp_ring_type ring_type);
|
||||||
|
|
||||||
static int psp_v3_1_init_microcode(struct psp_context *psp)
|
static int psp_v3_1_init_microcode(struct psp_context *psp)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = psp->adev;
|
struct amdgpu_device *adev = psp->adev;
|
||||||
|
@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
|
||||||
|
|
||||||
psp_v3_1_reroute_ih(psp);
|
psp_v3_1_reroute_ih(psp);
|
||||||
|
|
||||||
/* Write low address of the ring to C2PMSG_69 */
|
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
ret = psp_v3_1_ring_stop(psp, ring_type);
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
if (ret) {
|
||||||
/* Write high address of the ring to C2PMSG_70 */
|
DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
|
||||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
return ret;
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
}
|
||||||
/* Write size of ring to C2PMSG_71 */
|
|
||||||
psp_ring_reg = ring->ring_size;
|
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
|
||||||
/* Write the ring initialization command to C2PMSG_64 */
|
|
||||||
psp_ring_reg = ring_type;
|
|
||||||
psp_ring_reg = psp_ring_reg << 16;
|
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
|
||||||
|
|
||||||
/* there might be handshake issue with hardware which needs delay */
|
/* Write low address of the ring to C2PMSG_102 */
|
||||||
mdelay(20);
|
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
|
||||||
|
/* Write high address of the ring to C2PMSG_103 */
|
||||||
|
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
|
||||||
|
/* No size initialization for sriov */
|
||||||
|
/* Write the ring initialization command to C2PMSG_101 */
|
||||||
|
psp_ring_reg = ring_type;
|
||||||
|
psp_ring_reg = psp_ring_reg << 16;
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
|
||||||
|
|
||||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
/* there might be hardware handshake issue which needs delay */
|
||||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
mdelay(20);
|
||||||
0x80000000, 0x8000FFFF, false);
|
|
||||||
|
|
||||||
|
/* Wait for response flag (bit 31) in C2PMSG_101 */
|
||||||
|
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
|
||||||
|
mmMP0_SMN_C2PMSG_101), 0x80000000,
|
||||||
|
0x8000FFFF, false);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/* Write low address of the ring to C2PMSG_69 */
|
||||||
|
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
||||||
|
/* Write high address of the ring to C2PMSG_70 */
|
||||||
|
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
||||||
|
/* Write size of ring to C2PMSG_71 */
|
||||||
|
psp_ring_reg = ring->ring_size;
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
||||||
|
/* Write the ring initialization command to C2PMSG_64 */
|
||||||
|
psp_ring_reg = ring_type;
|
||||||
|
psp_ring_reg = psp_ring_reg << 16;
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||||
|
|
||||||
|
/* there might be hardware handshake issue which needs delay */
|
||||||
|
mdelay(20);
|
||||||
|
|
||||||
|
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||||
|
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
|
||||||
|
mmMP0_SMN_C2PMSG_64), 0x80000000,
|
||||||
|
0x8000FFFF, false);
|
||||||
|
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
|
||||||
unsigned int psp_ring_reg = 0;
|
unsigned int psp_ring_reg = 0;
|
||||||
struct amdgpu_device *adev = psp->adev;
|
struct amdgpu_device *adev = psp->adev;
|
||||||
|
|
||||||
/* Write the ring destroy command to C2PMSG_64 */
|
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||||
psp_ring_reg = 3 << 16;
|
/* Write the Destroy GPCOM ring command to C2PMSG_101 */
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
|
||||||
|
|
||||||
/* there might be handshake issue with hardware which needs delay */
|
/* there might be handshake issue which needs delay */
|
||||||
mdelay(20);
|
mdelay(20);
|
||||||
|
|
||||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
/* Wait for response flag (bit 31) in C2PMSG_101 */
|
||||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
ret = psp_wait_for(psp,
|
||||||
0x80000000, 0x80000000, false);
|
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
|
||||||
|
0x80000000, 0x80000000, false);
|
||||||
|
} else {
|
||||||
|
/* Write the ring destroy command to C2PMSG_64 */
|
||||||
|
psp_ring_reg = 3 << 16;
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||||
|
|
||||||
|
/* there might be handshake issue which needs delay */
|
||||||
|
mdelay(20);
|
||||||
|
|
||||||
|
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||||
|
ret = psp_wait_for(psp,
|
||||||
|
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||||
|
0x80000000, 0x80000000, false);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
|
||||||
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
|
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
|
||||||
|
|
||||||
/* KM (GPCOM) prepare write pointer */
|
/* KM (GPCOM) prepare write pointer */
|
||||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
if (psp_v3_1_support_vmr_ring(psp))
|
||||||
|
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
|
||||||
|
else
|
||||||
|
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
||||||
|
|
||||||
/* Update KM RB frame pointer to new frame */
|
/* Update KM RB frame pointer to new frame */
|
||||||
/* write_frame ptr increments by size of rb_frame in bytes */
|
/* write_frame ptr increments by size of rb_frame in bytes */
|
||||||
|
@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
|
||||||
|
|
||||||
/* Update the write Pointer in DWORDs */
|
/* Update the write Pointer in DWORDs */
|
||||||
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
|
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
|
||||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
|
||||||
|
/* send interrupt to PSP for SRIOV ring write pointer update */
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
|
||||||
|
GFX_CTRL_CMD_ID_CONSUME_CMD);
|
||||||
|
} else
|
||||||
|
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
|
||||||
|
{
|
||||||
|
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct psp_funcs psp_v3_1_funcs = {
|
static const struct psp_funcs psp_v3_1_funcs = {
|
||||||
.init_microcode = psp_v3_1_init_microcode,
|
.init_microcode = psp_v3_1_init_microcode,
|
||||||
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
|
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
|
||||||
|
@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
|
||||||
.compare_sram_data = psp_v3_1_compare_sram_data,
|
.compare_sram_data = psp_v3_1_compare_sram_data,
|
||||||
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
|
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
|
||||||
.mode1_reset = psp_v3_1_mode1_reset,
|
.mode1_reset = psp_v3_1_mode1_reset,
|
||||||
|
.support_vmr_ring = psp_v3_1_support_vmr_ring,
|
||||||
};
|
};
|
||||||
|
|
||||||
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
|
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
|
||||||
|
|
|
@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
switch (adev->asic_type) {
|
switch (adev->asic_type) {
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
soc15_program_register_sequence(adev,
|
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||||
golden_settings_sdma_4,
|
soc15_program_register_sequence(adev,
|
||||||
ARRAY_SIZE(golden_settings_sdma_4));
|
golden_settings_sdma_4,
|
||||||
soc15_program_register_sequence(adev,
|
ARRAY_SIZE(golden_settings_sdma_4));
|
||||||
golden_settings_sdma_vg10,
|
soc15_program_register_sequence(adev,
|
||||||
ARRAY_SIZE(golden_settings_sdma_vg10));
|
golden_settings_sdma_vg10,
|
||||||
|
ARRAY_SIZE(golden_settings_sdma_vg10));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
soc15_program_register_sequence(adev,
|
soc15_program_register_sequence(adev,
|
||||||
|
@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* handle resume path. */
|
/* handle resume path. */
|
||||||
if (*ras_if)
|
if (*ras_if) {
|
||||||
|
/* resend ras TA enable cmd during resume.
|
||||||
|
* prepare to handle failure.
|
||||||
|
*/
|
||||||
|
ih_info.head = **ras_if;
|
||||||
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
/* request a gpu reset. will run again. */
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__SDMA);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* fail to enable ras, cleanup all. */
|
||||||
|
goto irq;
|
||||||
|
}
|
||||||
|
/* enable successfully. continue. */
|
||||||
goto resume;
|
goto resume;
|
||||||
|
}
|
||||||
|
|
||||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||||
if (!*ras_if)
|
if (!*ras_if)
|
||||||
|
@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle)
|
||||||
**ras_if = ras_block;
|
**ras_if = ras_block;
|
||||||
|
|
||||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||||
if (r)
|
if (r) {
|
||||||
|
if (r == -EAGAIN) {
|
||||||
|
amdgpu_ras_request_reset_on_boot(adev,
|
||||||
|
AMDGPU_RAS_BLOCK__SDMA);
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
goto feature;
|
goto feature;
|
||||||
|
}
|
||||||
|
|
||||||
ih_info.head = **ras_if;
|
ih_info.head = **ras_if;
|
||||||
fs_info.head = **ras_if;
|
fs_info.head = **ras_if;
|
||||||
|
@ -1571,7 +1596,7 @@ interrupt:
|
||||||
feature:
|
feature:
|
||||||
kfree(*ras_if);
|
kfree(*ras_if);
|
||||||
*ras_if = NULL;
|
*ras_if = NULL;
|
||||||
return -EINVAL;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sdma_v4_0_sw_init(void *handle)
|
static int sdma_v4_0_sw_init(void *handle)
|
||||||
|
|
|
@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint64_t nak_r, nak_g;
|
||||||
|
|
||||||
|
/* Get the number of NAKs received and generated */
|
||||||
|
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||||
|
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||||
|
|
||||||
|
/* Add the total number of NAKs, i.e the number of replays */
|
||||||
|
return (nak_r + nak_g);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amdgpu_asic_funcs si_asic_funcs =
|
static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||||
{
|
{
|
||||||
.read_disabled_bios = &si_read_disabled_bios,
|
.read_disabled_bios = &si_read_disabled_bios,
|
||||||
|
@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||||
.need_full_reset = &si_need_full_reset,
|
.need_full_reset = &si_need_full_reset,
|
||||||
.get_pcie_usage = &si_get_pcie_usage,
|
.get_pcie_usage = &si_get_pcie_usage,
|
||||||
.need_reset_on_init = &si_need_reset_on_init,
|
.need_reset_on_init = &si_need_reset_on_init,
|
||||||
|
.get_pcie_replay_count = &si_get_pcie_replay_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
|
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
#include "smuio/smuio_9_0_offset.h"
|
#include "smuio/smuio_9_0_offset.h"
|
||||||
#include "smuio/smuio_9_0_sh_mask.h"
|
#include "smuio/smuio_9_0_sh_mask.h"
|
||||||
#include "nbio/nbio_7_0_default.h"
|
#include "nbio/nbio_7_0_default.h"
|
||||||
|
#include "nbio/nbio_7_0_offset.h"
|
||||||
#include "nbio/nbio_7_0_sh_mask.h"
|
#include "nbio/nbio_7_0_sh_mask.h"
|
||||||
#include "nbio/nbio_7_0_smn.h"
|
#include "nbio/nbio_7_0_smn.h"
|
||||||
#include "mp/mp_9_0_offset.h"
|
#include "mp/mp_9_0_offset.h"
|
||||||
|
@ -64,6 +65,9 @@
|
||||||
#include "dce_virtual.h"
|
#include "dce_virtual.h"
|
||||||
#include "mxgpu_ai.h"
|
#include "mxgpu_ai.h"
|
||||||
#include "amdgpu_smu.h"
|
#include "amdgpu_smu.h"
|
||||||
|
#include "amdgpu_ras.h"
|
||||||
|
#include "amdgpu_xgmi.h"
|
||||||
|
#include <uapi/linux/kfd_ioctl.h>
|
||||||
|
|
||||||
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
|
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
|
||||||
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
|
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
|
||||||
|
@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
|
||||||
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
|
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
|
||||||
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
|
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
|
||||||
|
|
||||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
|
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
|
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
|
||||||
|
@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
|
||||||
tmp &= ~(entry->and_mask);
|
tmp &= ~(entry->and_mask);
|
||||||
tmp |= entry->or_mask;
|
tmp |= entry->or_mask;
|
||||||
}
|
}
|
||||||
WREG32(reg, tmp);
|
|
||||||
|
if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
|
||||||
|
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
|
||||||
|
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
|
||||||
|
reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
|
||||||
|
WREG32_RLC(reg, tmp);
|
||||||
|
else
|
||||||
|
WREG32(reg, tmp);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
|
||||||
soc15_asic_get_baco_capability(adev, &baco_reset);
|
soc15_asic_get_baco_capability(adev, &baco_reset);
|
||||||
else
|
else
|
||||||
baco_reset = false;
|
baco_reset = false;
|
||||||
|
if (baco_reset) {
|
||||||
|
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
|
||||||
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
|
|
||||||
|
if (hive || (ras && ras->supported))
|
||||||
|
baco_reset = false;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
baco_reset = false;
|
baco_reset = false;
|
||||||
|
@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
|
||||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
/* For Vega10 SR-IOV, PSP need to be initialized before IH */
|
||||||
if (adev->asic_type == CHIP_VEGA20)
|
if (amdgpu_sriov_vf(adev)) {
|
||||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||||
else
|
if (adev->asic_type == CHIP_VEGA20)
|
||||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||||
|
else
|
||||||
|
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||||
|
}
|
||||||
|
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||||
|
} else {
|
||||||
|
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||||
|
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||||
|
if (adev->asic_type == CHIP_VEGA20)
|
||||||
|
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||||
|
else
|
||||||
|
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||||
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
||||||
|
@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
||||||
/* Just return false for soc15 GPUs. Reset does not seem to
|
/* Just return false for soc15 GPUs. Reset does not seem to
|
||||||
* be necessary.
|
* be necessary.
|
||||||
*/
|
*/
|
||||||
return false;
|
if (!amdgpu_passthrough(adev))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (adev->flags & AMD_IS_APU)
|
if (adev->flags & AMD_IS_APU)
|
||||||
return false;
|
return false;
|
||||||
|
@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint64_t nak_r, nak_g;
|
||||||
|
|
||||||
|
/* Get the number of NAKs received and generated */
|
||||||
|
nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
|
||||||
|
nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
|
||||||
|
|
||||||
|
/* Add the total number of NAKs, i.e the number of replays */
|
||||||
|
return (nak_r + nak_g);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||||
{
|
{
|
||||||
.read_disabled_bios = &soc15_read_disabled_bios,
|
.read_disabled_bios = &soc15_read_disabled_bios,
|
||||||
|
@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||||
.init_doorbell_index = &vega10_doorbell_index_init,
|
.init_doorbell_index = &vega10_doorbell_index_init,
|
||||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||||
|
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||||
|
@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||||
.init_doorbell_index = &vega20_doorbell_index_init,
|
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||||
|
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int soc15_common_early_init(void *handle)
|
static int soc15_common_early_init(void *handle)
|
||||||
{
|
{
|
||||||
|
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
|
||||||
|
adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
|
||||||
|
adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
|
||||||
adev->smc_rreg = NULL;
|
adev->smc_rreg = NULL;
|
||||||
adev->smc_wreg = NULL;
|
adev->smc_wreg = NULL;
|
||||||
adev->pcie_rreg = &soc15_pcie_rreg;
|
adev->pcie_rreg = &soc15_pcie_rreg;
|
||||||
|
@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
|
||||||
int i;
|
int i;
|
||||||
struct amdgpu_ring *ring;
|
struct amdgpu_ring *ring;
|
||||||
|
|
||||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
/* Two reasons to skip
|
||||||
ring = &adev->sdma.instance[i].ring;
|
* 1, Host driver already programmed them
|
||||||
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
* 2, To avoid registers program violations in SR-IOV
|
||||||
ring->use_doorbell, ring->doorbell_index,
|
*/
|
||||||
adev->doorbell_index.sdma_doorbell_range);
|
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||||
|
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||||
|
ring = &adev->sdma.instance[i].ring;
|
||||||
|
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
||||||
|
ring->use_doorbell, ring->doorbell_index,
|
||||||
|
adev->doorbell_index.sdma_doorbell_range);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||||
|
@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle)
|
||||||
soc15_program_aspm(adev);
|
soc15_program_aspm(adev);
|
||||||
/* setup nbio registers */
|
/* setup nbio registers */
|
||||||
adev->nbio_funcs->init_registers(adev);
|
adev->nbio_funcs->init_registers(adev);
|
||||||
|
/* remap HDP registers to a hole in mmio space,
|
||||||
|
* for the purpose of expose those registers
|
||||||
|
* to process space
|
||||||
|
*/
|
||||||
|
if (adev->nbio_funcs->remap_hdp_registers)
|
||||||
|
adev->nbio_funcs->remap_hdp_registers(adev);
|
||||||
/* enable the doorbell aperture */
|
/* enable the doorbell aperture */
|
||||||
soc15_enable_doorbell_aperture(adev, true);
|
soc15_enable_doorbell_aperture(adev, true);
|
||||||
/* HW doorbell routing policy: doorbell writing not
|
/* HW doorbell routing policy: doorbell writing not
|
||||||
|
|
|
@ -42,8 +42,18 @@ struct soc15_reg_golden {
|
||||||
u32 or_mask;
|
u32 or_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct soc15_reg_entry {
|
||||||
|
uint32_t hwip;
|
||||||
|
uint32_t inst;
|
||||||
|
uint32_t seg;
|
||||||
|
uint32_t reg_offset;
|
||||||
|
uint32_t reg_value;
|
||||||
|
};
|
||||||
|
|
||||||
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
|
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
|
||||||
|
|
||||||
|
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
|
||||||
|
|
||||||
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
|
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
|
||||||
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
|
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
|
||||||
|
|
||||||
|
|
|
@ -69,26 +69,60 @@
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
|
#define WREG32_RLC(reg, value) \
|
||||||
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
|
||||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
|
||||||
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
|
|
||||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
|
||||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
|
||||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
|
||||||
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
|
|
||||||
|
|
||||||
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
|
|
||||||
do { \
|
do { \
|
||||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
|
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
uint32_t i = 0; \
|
||||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
uint32_t retries = 50000; \
|
||||||
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
|
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
|
||||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
|
||||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
|
||||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
WREG32(r0, value); \
|
||||||
|
WREG32(r1, (reg | 0x80000000)); \
|
||||||
|
WREG32(spare_int, 0x1); \
|
||||||
|
for (i = 0; i < retries; i++) { \
|
||||||
|
u32 tmp = RREG32(r1); \
|
||||||
|
if (!(tmp & 0x80000000)) \
|
||||||
|
break; \
|
||||||
|
udelay(10); \
|
||||||
|
} \
|
||||||
|
if (i >= retries) \
|
||||||
|
pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
|
||||||
|
} else { \
|
||||||
|
WREG32(reg, value); \
|
||||||
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
|
||||||
|
do { \
|
||||||
|
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
|
||||||
|
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||||
|
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
|
||||||
|
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
|
||||||
|
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
|
||||||
|
uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
|
||||||
|
if (target_reg == grbm_cntl) \
|
||||||
|
WREG32(r2, value); \
|
||||||
|
else if (target_reg == grbm_idx) \
|
||||||
|
WREG32(r3, value); \
|
||||||
|
WREG32(target_reg, value); \
|
||||||
|
} else { \
|
||||||
|
WREG32(target_reg, value); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
|
||||||
|
do { \
|
||||||
|
uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
|
||||||
|
WREG32_RLC(target_reg, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
|
||||||
|
WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
|
||||||
|
(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
|
||||||
|
& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
|
||||||
|
|
||||||
|
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
|
||||||
|
WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_UVD,
|
.type = AMDGPU_RING_TYPE_UVD,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = uvd_v4_2_ring_get_rptr,
|
.get_rptr = uvd_v4_2_ring_get_rptr,
|
||||||
.get_wptr = uvd_v4_2_ring_get_wptr,
|
.get_wptr = uvd_v4_2_ring_get_wptr,
|
||||||
.set_wptr = uvd_v4_2_ring_set_wptr,
|
.set_wptr = uvd_v4_2_ring_set_wptr,
|
||||||
|
|
|
@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_UVD,
|
.type = AMDGPU_RING_TYPE_UVD,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = uvd_v5_0_ring_get_rptr,
|
.get_rptr = uvd_v5_0_ring_get_rptr,
|
||||||
.get_wptr = uvd_v5_0_ring_get_wptr,
|
.get_wptr = uvd_v5_0_ring_get_wptr,
|
||||||
.set_wptr = uvd_v5_0_ring_set_wptr,
|
.set_wptr = uvd_v5_0_ring_set_wptr,
|
||||||
|
|
|
@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_UVD,
|
.type = AMDGPU_RING_TYPE_UVD,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = uvd_v6_0_ring_get_rptr,
|
.get_rptr = uvd_v6_0_ring_get_rptr,
|
||||||
.get_wptr = uvd_v6_0_ring_get_wptr,
|
.get_wptr = uvd_v6_0_ring_get_wptr,
|
||||||
.set_wptr = uvd_v6_0_ring_set_wptr,
|
.set_wptr = uvd_v6_0_ring_set_wptr,
|
||||||
|
@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_UVD,
|
.type = AMDGPU_RING_TYPE_UVD,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = uvd_v6_0_ring_get_rptr,
|
.get_rptr = uvd_v6_0_ring_get_rptr,
|
||||||
.get_wptr = uvd_v6_0_ring_get_wptr,
|
.get_wptr = uvd_v6_0_ring_get_wptr,
|
||||||
.set_wptr = uvd_v6_0_ring_set_wptr,
|
.set_wptr = uvd_v6_0_ring_set_wptr,
|
||||||
|
@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
|
||||||
.align_mask = 0x3f,
|
.align_mask = 0x3f,
|
||||||
.nop = HEVC_ENC_CMD_NO_OP,
|
.nop = HEVC_ENC_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
|
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
|
||||||
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
|
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
|
||||||
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
|
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
|
||||||
|
|
|
@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_UVD,
|
.type = AMDGPU_RING_TYPE_UVD,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.get_rptr = uvd_v7_0_ring_get_rptr,
|
.get_rptr = uvd_v7_0_ring_get_rptr,
|
||||||
.get_wptr = uvd_v7_0_ring_get_wptr,
|
.get_wptr = uvd_v7_0_ring_get_wptr,
|
||||||
|
@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
|
||||||
.align_mask = 0x3f,
|
.align_mask = 0x3f,
|
||||||
.nop = HEVC_ENC_CMD_NO_OP,
|
.nop = HEVC_ENC_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
|
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
|
||||||
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
|
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
|
||||||
|
|
|
@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.nop = VCE_CMD_NO_OP,
|
.nop = VCE_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = vce_v2_0_ring_get_rptr,
|
.get_rptr = vce_v2_0_ring_get_rptr,
|
||||||
.get_wptr = vce_v2_0_ring_get_wptr,
|
.get_wptr = vce_v2_0_ring_get_wptr,
|
||||||
.set_wptr = vce_v2_0_ring_set_wptr,
|
.set_wptr = vce_v2_0_ring_set_wptr,
|
||||||
|
|
|
@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.nop = VCE_CMD_NO_OP,
|
.nop = VCE_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = vce_v3_0_ring_get_rptr,
|
.get_rptr = vce_v3_0_ring_get_rptr,
|
||||||
.get_wptr = vce_v3_0_ring_get_wptr,
|
.get_wptr = vce_v3_0_ring_get_wptr,
|
||||||
.set_wptr = vce_v3_0_ring_set_wptr,
|
.set_wptr = vce_v3_0_ring_set_wptr,
|
||||||
|
@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.nop = VCE_CMD_NO_OP,
|
.nop = VCE_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.get_rptr = vce_v3_0_ring_get_rptr,
|
.get_rptr = vce_v3_0_ring_get_rptr,
|
||||||
.get_wptr = vce_v3_0_ring_get_wptr,
|
.get_wptr = vce_v3_0_ring_get_wptr,
|
||||||
.set_wptr = vce_v3_0_ring_set_wptr,
|
.set_wptr = vce_v3_0_ring_set_wptr,
|
||||||
|
|
|
@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
|
||||||
.align_mask = 0x3f,
|
.align_mask = 0x3f,
|
||||||
.nop = VCE_CMD_NO_OP,
|
.nop = VCE_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.get_rptr = vce_v4_0_ring_get_rptr,
|
.get_rptr = vce_v4_0_ring_get_rptr,
|
||||||
.get_wptr = vce_v4_0_ring_get_wptr,
|
.get_wptr = vce_v4_0_ring_get_wptr,
|
||||||
|
|
|
@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
|
||||||
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
|
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||||
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
|
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
|
||||||
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
|
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
|
||||||
|
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||||
|
struct dpg_pause_state *new_state);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* vcn_v1_0_early_init - set function pointers
|
* vcn_v1_0_early_init - set function pointers
|
||||||
|
@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
return r;
|
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||||
|
struct dpg_pause_state *new_state)
|
||||||
|
{
|
||||||
|
int ret_code;
|
||||||
|
uint32_t reg_data = 0;
|
||||||
|
uint32_t reg_data2 = 0;
|
||||||
|
struct amdgpu_ring *ring;
|
||||||
|
|
||||||
|
/* pause/unpause if state is changed */
|
||||||
|
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||||
|
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||||
|
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||||
|
new_state->fw_based, new_state->jpeg);
|
||||||
|
|
||||||
|
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||||
|
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
||||||
|
|
||||||
|
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
|
||||||
|
ret_code = 0;
|
||||||
|
|
||||||
|
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||||
|
|
||||||
|
if (!ret_code) {
|
||||||
|
/* pause DPG non-jpeg */
|
||||||
|
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||||
|
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
|
||||||
|
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
||||||
|
|
||||||
|
/* Restore */
|
||||||
|
ring = &adev->vcn.ring_enc[0];
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||||
|
|
||||||
|
ring = &adev->vcn.ring_enc[1];
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||||
|
|
||||||
|
ring = &adev->vcn.ring_dec;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||||
|
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||||
|
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* unpause dpg non-jpeg, no need to wait */
|
||||||
|
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||||
|
}
|
||||||
|
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* pause/unpause if state is changed */
|
||||||
|
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
|
||||||
|
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||||
|
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||||
|
new_state->fw_based, new_state->jpeg);
|
||||||
|
|
||||||
|
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||||
|
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
|
||||||
|
|
||||||
|
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
|
||||||
|
ret_code = 0;
|
||||||
|
|
||||||
|
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||||
|
|
||||||
|
if (!ret_code) {
|
||||||
|
/* Make sure JPRG Snoop is disabled before sending the pause */
|
||||||
|
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
|
||||||
|
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
|
||||||
|
|
||||||
|
/* pause DPG jpeg */
|
||||||
|
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||||
|
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
|
||||||
|
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
|
||||||
|
|
||||||
|
/* Restore */
|
||||||
|
ring = &adev->vcn.ring_jpeg;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||||
|
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
||||||
|
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
|
||||||
|
lower_32_bits(ring->gpu_addr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
|
||||||
|
upper_32_bits(ring->gpu_addr));
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||||
|
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||||
|
|
||||||
|
ring = &adev->vcn.ring_dec;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||||
|
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||||
|
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||||
|
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||||
|
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* unpause dpg jpeg, no need to wait */
|
||||||
|
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||||
|
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||||
|
}
|
||||||
|
adev->vcn.pause_state.jpeg = new_state->jpeg;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static bool vcn_v1_0_is_idle(void *handle)
|
static bool vcn_v1_0_is_idle(void *handle)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
|
||||||
.type = AMDGPU_RING_TYPE_VCN_DEC,
|
.type = AMDGPU_RING_TYPE_VCN_DEC,
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
|
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
|
||||||
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
|
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
|
||||||
|
@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
|
||||||
.align_mask = 0x3f,
|
.align_mask = 0x3f,
|
||||||
.nop = VCN_ENC_CMD_NO_OP,
|
.nop = VCN_ENC_CMD_NO_OP,
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
|
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
|
||||||
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
|
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
|
||||||
|
@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
|
||||||
.align_mask = 0xf,
|
.align_mask = 0xf,
|
||||||
.nop = PACKET0(0x81ff, 0),
|
.nop = PACKET0(0x81ff, 0),
|
||||||
.support_64bit_ptrs = false,
|
.support_64bit_ptrs = false,
|
||||||
|
.no_user_fence = true,
|
||||||
.vmhub = AMDGPU_MMHUB,
|
.vmhub = AMDGPU_MMHUB,
|
||||||
.extra_dw = 64,
|
.extra_dw = 64,
|
||||||
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
|
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
|
||||||
|
|
|
@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
||||||
|
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||||
|
}
|
||||||
adev->irq.ih.enabled = true;
|
adev->irq.ih.enabled = true;
|
||||||
|
|
||||||
if (adev->irq.ih1.ring_size) {
|
if (adev->irq.ih1.ring_size) {
|
||||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||||
RB_ENABLE, 1);
|
RB_ENABLE, 1);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||||
|
}
|
||||||
adev->irq.ih1.enabled = true;
|
adev->irq.ih1.enabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
||||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||||
RB_ENABLE, 1);
|
RB_ENABLE, 1);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||||
|
}
|
||||||
adev->irq.ih2.enabled = true;
|
adev->irq.ih2.enabled = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||||
|
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||||
|
}
|
||||||
|
|
||||||
/* set rptr, wptr to 0 */
|
/* set rptr, wptr to 0 */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
|
||||||
|
@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||||
RB_ENABLE, 0);
|
RB_ENABLE, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||||
|
}
|
||||||
/* set rptr, wptr to 0 */
|
/* set rptr, wptr to 0 */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||||
|
@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
||||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||||
RB_ENABLE, 0);
|
RB_ENABLE, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||||
|
}
|
||||||
|
|
||||||
/* set rptr, wptr to 0 */
|
/* set rptr, wptr to 0 */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||||
|
@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
|
||||||
!!adev->irq.msi_enabled);
|
!!adev->irq.msi_enabled);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
|
||||||
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||||
|
}
|
||||||
|
|
||||||
/* set the writeback address whether it's enabled or not */
|
/* set the writeback address whether it's enabled or not */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
|
||||||
|
@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||||
WPTR_OVERFLOW_ENABLE, 0);
|
WPTR_OVERFLOW_ENABLE, 0);
|
||||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||||
RB_FULL_DRAIN_ENABLE, 1);
|
RB_FULL_DRAIN_ENABLE, 1);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||||
|
}
|
||||||
|
|
||||||
/* set rptr, wptr to 0 */
|
/* set rptr, wptr to 0 */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||||
|
@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
||||||
|
|
||||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
|
||||||
|
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||||
|
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||||
|
ih_rb_cntl)) {
|
||||||
|
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||||
|
return -ETIMEDOUT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||||
|
}
|
||||||
|
|
||||||
/* set rptr, wptr to 0 */
|
/* set rptr, wptr to 0 */
|
||||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||||
|
|
|
@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint64_t nak_r, nak_g;
|
||||||
|
|
||||||
|
/* Get the number of NAKs received and generated */
|
||||||
|
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||||
|
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||||
|
|
||||||
|
/* Add the total number of NAKs, i.e the number of replays */
|
||||||
|
return (nak_r + nak_g);
|
||||||
|
}
|
||||||
|
|
||||||
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
|
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
u32 clock_cntl, pc;
|
u32 clock_cntl, pc;
|
||||||
|
@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
|
||||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||||
.get_pcie_usage = &vi_get_pcie_usage,
|
.get_pcie_usage = &vi_get_pcie_usage,
|
||||||
.need_reset_on_init = &vi_need_reset_on_init,
|
.need_reset_on_init = &vi_need_reset_on_init,
|
||||||
|
.get_pcie_replay_count = &vi_get_pcie_replay_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CZ_REV_BRISTOL(rev) \
|
#define CZ_REV_BRISTOL(rev) \
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||||
0xbf820001, 0xbf82012b,
|
0xbf820001, 0xbf820121,
|
||||||
0xb8f4f802, 0x89748674,
|
0xb8f4f802, 0x89748674,
|
||||||
0xb8f5f803, 0x8675ff75,
|
0xb8f5f803, 0x8675ff75,
|
||||||
0x00000400, 0xbf850017,
|
0x00000400, 0xbf850017,
|
||||||
|
@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||||
0x8671ff71, 0x0000ffff,
|
0x8671ff71, 0x0000ffff,
|
||||||
0x8f728374, 0xb972e0c2,
|
0x8f728374, 0xb972e0c2,
|
||||||
0xbf800002, 0xb9740002,
|
0xbf800002, 0xb9740002,
|
||||||
0xbe801f70, 0xb8f5f803,
|
0xbe801f70, 0xbefa0080,
|
||||||
0x8675ff75, 0x00000100,
|
|
||||||
0xbf840006, 0xbefa0080,
|
|
||||||
0xb97a0203, 0x8671ff71,
|
|
||||||
0x0000ffff, 0x80f08870,
|
|
||||||
0x82f18071, 0xbefa0080,
|
|
||||||
0xb97a0283, 0xbef60068,
|
0xb97a0283, 0xbef60068,
|
||||||
0xbef70069, 0xb8fa1c07,
|
0xbef70069, 0xb8fa1c07,
|
||||||
0x8e7a9c7a, 0x87717a71,
|
0x8e7a9c7a, 0x87717a71,
|
||||||
|
@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||||
|
|
||||||
|
|
||||||
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||||
0xbf820001, 0xbf82015d,
|
0xbf820001, 0xbf82015e,
|
||||||
0xb8f8f802, 0x89788678,
|
0xb8f8f802, 0x89788678,
|
||||||
0xb8f1f803, 0x866eff71,
|
0xb8fbf803, 0x866eff7b,
|
||||||
0x00000400, 0xbf850037,
|
0x00000400, 0xbf85003b,
|
||||||
0x866eff71, 0x00000800,
|
0x866eff7b, 0x00000800,
|
||||||
0xbf850003, 0x866eff71,
|
0xbf850003, 0x866eff7b,
|
||||||
0x00000100, 0xbf840008,
|
0x00000100, 0xbf84000c,
|
||||||
0x866eff78, 0x00002000,
|
0x866eff78, 0x00002000,
|
||||||
0xbf840001, 0xbf810000,
|
0xbf840005, 0xbf8e0010,
|
||||||
|
0xb8eef803, 0x866eff6e,
|
||||||
|
0x00000400, 0xbf84fffb,
|
||||||
0x8778ff78, 0x00002000,
|
0x8778ff78, 0x00002000,
|
||||||
0x80ec886c, 0x82ed806d,
|
0x80ec886c, 0x82ed806d,
|
||||||
0xb8eef807, 0x866fff6e,
|
0xb8eef807, 0x866fff6e,
|
||||||
|
@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||||
0x8977ff77, 0xfc000000,
|
0x8977ff77, 0xfc000000,
|
||||||
0x87776f77, 0x896eff6e,
|
0x87776f77, 0x896eff6e,
|
||||||
0x001f8000, 0xb96ef807,
|
0x001f8000, 0xb96ef807,
|
||||||
0xb8f0f812, 0xb8f1f813,
|
0xb8faf812, 0xb8fbf813,
|
||||||
0x8ef08870, 0xc0071bb8,
|
0x8efa887a, 0xc0071bbd,
|
||||||
0x00000000, 0xbf8cc07f,
|
0x00000000, 0xbf8cc07f,
|
||||||
0xc0071c38, 0x00000008,
|
0xc0071ebd, 0x00000008,
|
||||||
0xbf8cc07f, 0x86ee6e6e,
|
0xbf8cc07f, 0x86ee6e6e,
|
||||||
0xbf840001, 0xbe801d6e,
|
0xbf840001, 0xbe801d6e,
|
||||||
0xb8f1f803, 0x8671ff71,
|
0xb8fbf803, 0x867bff7b,
|
||||||
0x000001ff, 0xbf850002,
|
0x000001ff, 0xbf850002,
|
||||||
0x806c846c, 0x826d806d,
|
0x806c846c, 0x826d806d,
|
||||||
0x866dff6d, 0x0000ffff,
|
0x866dff6d, 0x0000ffff,
|
||||||
|
@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||||
0x8f6e8378, 0xb96ee0c2,
|
0x8f6e8378, 0xb96ee0c2,
|
||||||
0xbf800002, 0xb9780002,
|
0xbf800002, 0xb9780002,
|
||||||
0xbe801f6c, 0x866dff6d,
|
0xbe801f6c, 0x866dff6d,
|
||||||
0x0000ffff, 0xbef00080,
|
0x0000ffff, 0xbefa0080,
|
||||||
0xb9700283, 0xb8f02407,
|
0xb97a0283, 0xb8fa2407,
|
||||||
0x8e709c70, 0x876d706d,
|
0x8e7a9b7a, 0x876d7a6d,
|
||||||
0xb8f003c7, 0x8e709b70,
|
0xb8fa03c7, 0x8e7a9a7a,
|
||||||
0x876d706d, 0xb8f0f807,
|
0x876d7a6d, 0xb8faf807,
|
||||||
0x8670ff70, 0x00007fff,
|
0x867aff7a, 0x00007fff,
|
||||||
0xb970f807, 0xbeee007e,
|
0xb97af807, 0xbeee007e,
|
||||||
0xbeef007f, 0xbefe0180,
|
0xbeef007f, 0xbefe0180,
|
||||||
0xbf900004, 0x87708478,
|
0xbf900004, 0x877a8478,
|
||||||
0xb970f802, 0xbf8e0002,
|
0xb97af802, 0xbf8e0002,
|
||||||
0xbf88fffe, 0xb8f02a05,
|
0xbf88fffe, 0xb8fa2a05,
|
||||||
|
0x807a817a, 0x8e7a8a7a,
|
||||||
|
0xb8fb1605, 0x807b817b,
|
||||||
|
0x8e7b867b, 0x807a7b7a,
|
||||||
|
0x807a7e7a, 0x827b807f,
|
||||||
|
0x867bff7b, 0x0000ffff,
|
||||||
|
0xc04b1c3d, 0x00000050,
|
||||||
|
0xbf8cc07f, 0xc04b1d3d,
|
||||||
|
0x00000060, 0xbf8cc07f,
|
||||||
|
0xc0431e7d, 0x00000074,
|
||||||
|
0xbf8cc07f, 0xbef4007e,
|
||||||
|
0x8675ff7f, 0x0000ffff,
|
||||||
|
0x8775ff75, 0x00040000,
|
||||||
|
0xbef60080, 0xbef700ff,
|
||||||
|
0x00807fac, 0x867aff7f,
|
||||||
|
0x08000000, 0x8f7a837a,
|
||||||
|
0x87777a77, 0x867aff7f,
|
||||||
|
0x70000000, 0x8f7a817a,
|
||||||
|
0x87777a77, 0xbef1007c,
|
||||||
|
0xbef00080, 0xb8f02a05,
|
||||||
0x80708170, 0x8e708a70,
|
0x80708170, 0x8e708a70,
|
||||||
0xb8f11605, 0x80718171,
|
0xb8fa1605, 0x807a817a,
|
||||||
0x8e718671, 0x80707170,
|
0x8e7a867a, 0x80707a70,
|
||||||
0x80707e70, 0x8271807f,
|
0xbef60084, 0xbef600ff,
|
||||||
0x8671ff71, 0x0000ffff,
|
0x01000000, 0xbefe007c,
|
||||||
0xc0471cb8, 0x00000040,
|
0xbefc0070, 0xc0611c7a,
|
||||||
0xbf8cc07f, 0xc04b1d38,
|
0x0000007c, 0xbf8cc07f,
|
||||||
0x00000048, 0xbf8cc07f,
|
0x80708470, 0xbefc007e,
|
||||||
0xc0431e78, 0x00000058,
|
0xbefe007c, 0xbefc0070,
|
||||||
0xbf8cc07f, 0xc0471eb8,
|
0xc0611b3a, 0x0000007c,
|
||||||
0x0000005c, 0xbf8cc07f,
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0xbefe007c,
|
||||||
|
0xbefc0070, 0xc0611b7a,
|
||||||
|
0x0000007c, 0xbf8cc07f,
|
||||||
|
0x80708470, 0xbefc007e,
|
||||||
|
0xbefe007c, 0xbefc0070,
|
||||||
|
0xc0611bba, 0x0000007c,
|
||||||
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0xbefe007c,
|
||||||
|
0xbefc0070, 0xc0611bfa,
|
||||||
|
0x0000007c, 0xbf8cc07f,
|
||||||
|
0x80708470, 0xbefc007e,
|
||||||
|
0xbefe007c, 0xbefc0070,
|
||||||
|
0xc0611e3a, 0x0000007c,
|
||||||
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0xb8fbf803,
|
||||||
|
0xbefe007c, 0xbefc0070,
|
||||||
|
0xc0611efa, 0x0000007c,
|
||||||
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0xbefe007c,
|
||||||
|
0xbefc0070, 0xc0611a3a,
|
||||||
|
0x0000007c, 0xbf8cc07f,
|
||||||
|
0x80708470, 0xbefc007e,
|
||||||
|
0xbefe007c, 0xbefc0070,
|
||||||
|
0xc0611a7a, 0x0000007c,
|
||||||
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0xb8f1f801,
|
||||||
|
0xbefe007c, 0xbefc0070,
|
||||||
|
0xc0611c7a, 0x0000007c,
|
||||||
|
0xbf8cc07f, 0x80708470,
|
||||||
|
0xbefc007e, 0x867aff7f,
|
||||||
|
0x04000000, 0xbeef0080,
|
||||||
|
0x876f6f7a, 0xb8f02a05,
|
||||||
|
0x80708170, 0x8e708a70,
|
||||||
|
0xb8fb1605, 0x807b817b,
|
||||||
|
0x8e7b847b, 0x8e76827b,
|
||||||
|
0xbef600ff, 0x01000000,
|
||||||
|
0xbef20174, 0x80747074,
|
||||||
|
0x82758075, 0xbefc0080,
|
||||||
|
0xbf800000, 0xbe802b00,
|
||||||
|
0xbe822b02, 0xbe842b04,
|
||||||
|
0xbe862b06, 0xbe882b08,
|
||||||
|
0xbe8a2b0a, 0xbe8c2b0c,
|
||||||
|
0xbe8e2b0e, 0xc06b003a,
|
||||||
|
0x00000000, 0xbf8cc07f,
|
||||||
|
0xc06b013a, 0x00000010,
|
||||||
|
0xbf8cc07f, 0xc06b023a,
|
||||||
|
0x00000020, 0xbf8cc07f,
|
||||||
|
0xc06b033a, 0x00000030,
|
||||||
|
0xbf8cc07f, 0x8074c074,
|
||||||
|
0x82758075, 0x807c907c,
|
||||||
|
0xbf0a7b7c, 0xbf85ffe7,
|
||||||
|
0xbef40172, 0xbef00080,
|
||||||
|
0xbefe00c1, 0xbeff00c1,
|
||||||
|
0xbee80080, 0xbee90080,
|
||||||
|
0xbef600ff, 0x01000000,
|
||||||
|
0xe0724000, 0x701d0000,
|
||||||
|
0xe0724100, 0x701d0100,
|
||||||
|
0xe0724200, 0x701d0200,
|
||||||
|
0xe0724300, 0x701d0300,
|
||||||
|
0xbefe00c1, 0xbeff00c1,
|
||||||
|
0xb8fb4306, 0x867bc17b,
|
||||||
|
0xbf84002c, 0xbf8a0000,
|
||||||
|
0x867aff6f, 0x04000000,
|
||||||
|
0xbf840028, 0x8e7b867b,
|
||||||
|
0x8e7b827b, 0xbef6007b,
|
||||||
|
0xb8f02a05, 0x80708170,
|
||||||
|
0x8e708a70, 0xb8fa1605,
|
||||||
|
0x807a817a, 0x8e7a867a,
|
||||||
|
0x80707a70, 0x8070ff70,
|
||||||
|
0x00000080, 0xbef600ff,
|
||||||
|
0x01000000, 0xbefc0080,
|
||||||
|
0xd28c0002, 0x000100c1,
|
||||||
|
0xd28d0003, 0x000204c1,
|
||||||
|
0xd1060002, 0x00011103,
|
||||||
|
0x7e0602ff, 0x00000200,
|
||||||
|
0xbefc00ff, 0x00010000,
|
||||||
|
0xbe800077, 0x8677ff77,
|
||||||
|
0xff7fffff, 0x8777ff77,
|
||||||
|
0x00058000, 0xd8ec0000,
|
||||||
|
0x00000002, 0xbf8cc07f,
|
||||||
|
0xe0765000, 0x701d0002,
|
||||||
|
0x68040702, 0xd0c9006a,
|
||||||
|
0x0000f702, 0xbf87fff7,
|
||||||
|
0xbef70000, 0xbef000ff,
|
||||||
|
0x00000400, 0xbefe00c1,
|
||||||
|
0xbeff00c1, 0xb8fb2a05,
|
||||||
|
0x807b817b, 0x8e7b827b,
|
||||||
|
0x8e76887b, 0xbef600ff,
|
||||||
|
0x01000000, 0xbefc0084,
|
||||||
|
0xbf0a7b7c, 0xbf840015,
|
||||||
|
0xbf11017c, 0x807bff7b,
|
||||||
|
0x00001000, 0x7e000300,
|
||||||
|
0x7e020301, 0x7e040302,
|
||||||
|
0x7e060303, 0xe0724000,
|
||||||
|
0x701d0000, 0xe0724100,
|
||||||
|
0x701d0100, 0xe0724200,
|
||||||
|
0x701d0200, 0xe0724300,
|
||||||
|
0x701d0300, 0x807c847c,
|
||||||
|
0x8070ff70, 0x00000400,
|
||||||
|
0xbf0a7b7c, 0xbf85ffef,
|
||||||
|
0xbf9c0000, 0xbf8200da,
|
||||||
0xbef4007e, 0x8675ff7f,
|
0xbef4007e, 0x8675ff7f,
|
||||||
0x0000ffff, 0x8775ff75,
|
0x0000ffff, 0x8775ff75,
|
||||||
0x00040000, 0xbef60080,
|
0x00040000, 0xbef60080,
|
||||||
0xbef700ff, 0x00807fac,
|
0xbef700ff, 0x00807fac,
|
||||||
0x8670ff7f, 0x08000000,
|
0x866eff7f, 0x08000000,
|
||||||
0x8f708370, 0x87777077,
|
0x8f6e836e, 0x87776e77,
|
||||||
0x8670ff7f, 0x70000000,
|
0x866eff7f, 0x70000000,
|
||||||
0x8f708170, 0x87777077,
|
0x8f6e816e, 0x87776e77,
|
||||||
0xbefb007c, 0xbefa0080,
|
0x866eff7f, 0x04000000,
|
||||||
0xb8fa2a05, 0x807a817a,
|
0xbf84001e, 0xbefe00c1,
|
||||||
0x8e7a8a7a, 0xb8f01605,
|
0xbeff00c1, 0xb8ef4306,
|
||||||
0x80708170, 0x8e708670,
|
0x866fc16f, 0xbf840019,
|
||||||
0x807a707a, 0xbef60084,
|
0x8e6f866f, 0x8e6f826f,
|
||||||
0xbef600ff, 0x01000000,
|
0xbef6006f, 0xb8f82a05,
|
||||||
0xbefe007c, 0xbefc007a,
|
|
||||||
0xc0611efa, 0x0000007c,
|
|
||||||
0xbf8cc07f, 0x807a847a,
|
|
||||||
0xbefc007e, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611b3a,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0xbefe007c, 0xbefc007a,
|
|
||||||
0xc0611b7a, 0x0000007c,
|
|
||||||
0xbf8cc07f, 0x807a847a,
|
|
||||||
0xbefc007e, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611bba,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0xbefe007c, 0xbefc007a,
|
|
||||||
0xc0611bfa, 0x0000007c,
|
|
||||||
0xbf8cc07f, 0x807a847a,
|
|
||||||
0xbefc007e, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611e3a,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0xb8f1f803, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611c7a,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0xbefe007c, 0xbefc007a,
|
|
||||||
0xc0611a3a, 0x0000007c,
|
|
||||||
0xbf8cc07f, 0x807a847a,
|
|
||||||
0xbefc007e, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611a7a,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0xb8fbf801, 0xbefe007c,
|
|
||||||
0xbefc007a, 0xc0611efa,
|
|
||||||
0x0000007c, 0xbf8cc07f,
|
|
||||||
0x807a847a, 0xbefc007e,
|
|
||||||
0x8670ff7f, 0x04000000,
|
|
||||||
0xbeef0080, 0x876f6f70,
|
|
||||||
0xb8fa2a05, 0x807a817a,
|
|
||||||
0x8e7a8a7a, 0xb8f11605,
|
|
||||||
0x80718171, 0x8e718471,
|
|
||||||
0x8e768271, 0xbef600ff,
|
|
||||||
0x01000000, 0xbef20174,
|
|
||||||
0x80747a74, 0x82758075,
|
|
||||||
0xbefc0080, 0xbf800000,
|
|
||||||
0xbe802b00, 0xbe822b02,
|
|
||||||
0xbe842b04, 0xbe862b06,
|
|
||||||
0xbe882b08, 0xbe8a2b0a,
|
|
||||||
0xbe8c2b0c, 0xbe8e2b0e,
|
|
||||||
0xc06b003a, 0x00000000,
|
|
||||||
0xbf8cc07f, 0xc06b013a,
|
|
||||||
0x00000010, 0xbf8cc07f,
|
|
||||||
0xc06b023a, 0x00000020,
|
|
||||||
0xbf8cc07f, 0xc06b033a,
|
|
||||||
0x00000030, 0xbf8cc07f,
|
|
||||||
0x8074c074, 0x82758075,
|
|
||||||
0x807c907c, 0xbf0a717c,
|
|
||||||
0xbf85ffe7, 0xbef40172,
|
|
||||||
0xbefa0080, 0xbefe00c1,
|
|
||||||
0xbeff00c1, 0xbee80080,
|
|
||||||
0xbee90080, 0xbef600ff,
|
|
||||||
0x01000000, 0xe0724000,
|
|
||||||
0x7a1d0000, 0xe0724100,
|
|
||||||
0x7a1d0100, 0xe0724200,
|
|
||||||
0x7a1d0200, 0xe0724300,
|
|
||||||
0x7a1d0300, 0xbefe00c1,
|
|
||||||
0xbeff00c1, 0xb8f14306,
|
|
||||||
0x8671c171, 0xbf84002c,
|
|
||||||
0xbf8a0000, 0x8670ff6f,
|
|
||||||
0x04000000, 0xbf840028,
|
|
||||||
0x8e718671, 0x8e718271,
|
|
||||||
0xbef60071, 0xb8fa2a05,
|
|
||||||
0x807a817a, 0x8e7a8a7a,
|
|
||||||
0xb8f01605, 0x80708170,
|
|
||||||
0x8e708670, 0x807a707a,
|
|
||||||
0x807aff7a, 0x00000080,
|
|
||||||
0xbef600ff, 0x01000000,
|
|
||||||
0xbefc0080, 0xd28c0002,
|
|
||||||
0x000100c1, 0xd28d0003,
|
|
||||||
0x000204c1, 0xd1060002,
|
|
||||||
0x00011103, 0x7e0602ff,
|
|
||||||
0x00000200, 0xbefc00ff,
|
|
||||||
0x00010000, 0xbe800077,
|
|
||||||
0x8677ff77, 0xff7fffff,
|
|
||||||
0x8777ff77, 0x00058000,
|
|
||||||
0xd8ec0000, 0x00000002,
|
|
||||||
0xbf8cc07f, 0xe0765000,
|
|
||||||
0x7a1d0002, 0x68040702,
|
|
||||||
0xd0c9006a, 0x0000e302,
|
|
||||||
0xbf87fff7, 0xbef70000,
|
|
||||||
0xbefa00ff, 0x00000400,
|
|
||||||
0xbefe00c1, 0xbeff00c1,
|
|
||||||
0xb8f12a05, 0x80718171,
|
|
||||||
0x8e718271, 0x8e768871,
|
|
||||||
0xbef600ff, 0x01000000,
|
|
||||||
0xbefc0084, 0xbf0a717c,
|
|
||||||
0xbf840015, 0xbf11017c,
|
|
||||||
0x8071ff71, 0x00001000,
|
|
||||||
0x7e000300, 0x7e020301,
|
|
||||||
0x7e040302, 0x7e060303,
|
|
||||||
0xe0724000, 0x7a1d0000,
|
|
||||||
0xe0724100, 0x7a1d0100,
|
|
||||||
0xe0724200, 0x7a1d0200,
|
|
||||||
0xe0724300, 0x7a1d0300,
|
|
||||||
0x807c847c, 0x807aff7a,
|
|
||||||
0x00000400, 0xbf0a717c,
|
|
||||||
0xbf85ffef, 0xbf9c0000,
|
|
||||||
0xbf8200dc, 0xbef4007e,
|
|
||||||
0x8675ff7f, 0x0000ffff,
|
|
||||||
0x8775ff75, 0x00040000,
|
|
||||||
0xbef60080, 0xbef700ff,
|
|
||||||
0x00807fac, 0x866eff7f,
|
|
||||||
0x08000000, 0x8f6e836e,
|
|
||||||
0x87776e77, 0x866eff7f,
|
|
||||||
0x70000000, 0x8f6e816e,
|
|
||||||
0x87776e77, 0x866eff7f,
|
|
||||||
0x04000000, 0xbf84001e,
|
|
||||||
0xbefe00c1, 0xbeff00c1,
|
|
||||||
0xb8ef4306, 0x866fc16f,
|
|
||||||
0xbf840019, 0x8e6f866f,
|
|
||||||
0x8e6f826f, 0xbef6006f,
|
|
||||||
0xb8f82a05, 0x80788178,
|
|
||||||
0x8e788a78, 0xb8ee1605,
|
|
||||||
0x806e816e, 0x8e6e866e,
|
|
||||||
0x80786e78, 0x8078ff78,
|
|
||||||
0x00000080, 0xbef600ff,
|
|
||||||
0x01000000, 0xbefc0080,
|
|
||||||
0xe0510000, 0x781d0000,
|
|
||||||
0xe0510100, 0x781d0000,
|
|
||||||
0x807cff7c, 0x00000200,
|
|
||||||
0x8078ff78, 0x00000200,
|
|
||||||
0xbf0a6f7c, 0xbf85fff6,
|
|
||||||
0xbef80080, 0xbefe00c1,
|
|
||||||
0xbeff00c1, 0xb8ef2a05,
|
|
||||||
0x806f816f, 0x8e6f826f,
|
|
||||||
0x8e76886f, 0xbef600ff,
|
|
||||||
0x01000000, 0xbeee0078,
|
|
||||||
0x8078ff78, 0x00000400,
|
|
||||||
0xbefc0084, 0xbf11087c,
|
|
||||||
0x806fff6f, 0x00008000,
|
|
||||||
0xe0524000, 0x781d0000,
|
|
||||||
0xe0524100, 0x781d0100,
|
|
||||||
0xe0524200, 0x781d0200,
|
|
||||||
0xe0524300, 0x781d0300,
|
|
||||||
0xbf8c0f70, 0x7e000300,
|
|
||||||
0x7e020301, 0x7e040302,
|
|
||||||
0x7e060303, 0x807c847c,
|
|
||||||
0x8078ff78, 0x00000400,
|
|
||||||
0xbf0a6f7c, 0xbf85ffee,
|
|
||||||
0xbf9c0000, 0xe0524000,
|
|
||||||
0x6e1d0000, 0xe0524100,
|
|
||||||
0x6e1d0100, 0xe0524200,
|
|
||||||
0x6e1d0200, 0xe0524300,
|
|
||||||
0x6e1d0300, 0xb8f82a05,
|
|
||||||
0x80788178, 0x8e788a78,
|
0x80788178, 0x8e788a78,
|
||||||
0xb8ee1605, 0x806e816e,
|
0xb8ee1605, 0x806e816e,
|
||||||
0x8e6e866e, 0x80786e78,
|
0x8e6e866e, 0x80786e78,
|
||||||
0x80f8c078, 0xb8ef1605,
|
0x8078ff78, 0x00000080,
|
||||||
0x806f816f, 0x8e6f846f,
|
0xbef600ff, 0x01000000,
|
||||||
0x8e76826f, 0xbef600ff,
|
0xbefc0080, 0xe0510000,
|
||||||
0x01000000, 0xbefc006f,
|
0x781d0000, 0xe0510100,
|
||||||
0xc031003a, 0x00000078,
|
0x781d0000, 0x807cff7c,
|
||||||
0x80f8c078, 0xbf8cc07f,
|
0x00000200, 0x8078ff78,
|
||||||
0x80fc907c, 0xbf800000,
|
0x00000200, 0xbf0a6f7c,
|
||||||
0xbe802d00, 0xbe822d02,
|
0xbf85fff6, 0xbef80080,
|
||||||
0xbe842d04, 0xbe862d06,
|
0xbefe00c1, 0xbeff00c1,
|
||||||
0xbe882d08, 0xbe8a2d0a,
|
0xb8ef2a05, 0x806f816f,
|
||||||
0xbe8c2d0c, 0xbe8e2d0e,
|
0x8e6f826f, 0x8e76886f,
|
||||||
0xbf06807c, 0xbf84fff0,
|
0xbef600ff, 0x01000000,
|
||||||
|
0xbeee0078, 0x8078ff78,
|
||||||
|
0x00000400, 0xbefc0084,
|
||||||
|
0xbf11087c, 0x806fff6f,
|
||||||
|
0x00008000, 0xe0524000,
|
||||||
|
0x781d0000, 0xe0524100,
|
||||||
|
0x781d0100, 0xe0524200,
|
||||||
|
0x781d0200, 0xe0524300,
|
||||||
|
0x781d0300, 0xbf8c0f70,
|
||||||
|
0x7e000300, 0x7e020301,
|
||||||
|
0x7e040302, 0x7e060303,
|
||||||
|
0x807c847c, 0x8078ff78,
|
||||||
|
0x00000400, 0xbf0a6f7c,
|
||||||
|
0xbf85ffee, 0xbf9c0000,
|
||||||
|
0xe0524000, 0x6e1d0000,
|
||||||
|
0xe0524100, 0x6e1d0100,
|
||||||
|
0xe0524200, 0x6e1d0200,
|
||||||
|
0xe0524300, 0x6e1d0300,
|
||||||
0xb8f82a05, 0x80788178,
|
0xb8f82a05, 0x80788178,
|
||||||
0x8e788a78, 0xb8ee1605,
|
0x8e788a78, 0xb8ee1605,
|
||||||
0x806e816e, 0x8e6e866e,
|
0x806e816e, 0x8e6e866e,
|
||||||
0x80786e78, 0xbef60084,
|
0x80786e78, 0x80f8c078,
|
||||||
|
0xb8ef1605, 0x806f816f,
|
||||||
|
0x8e6f846f, 0x8e76826f,
|
||||||
0xbef600ff, 0x01000000,
|
0xbef600ff, 0x01000000,
|
||||||
0xc0211bfa, 0x00000078,
|
0xbefc006f, 0xc031003a,
|
||||||
0x80788478, 0xc0211b3a,
|
0x00000078, 0x80f8c078,
|
||||||
|
0xbf8cc07f, 0x80fc907c,
|
||||||
|
0xbf800000, 0xbe802d00,
|
||||||
|
0xbe822d02, 0xbe842d04,
|
||||||
|
0xbe862d06, 0xbe882d08,
|
||||||
|
0xbe8a2d0a, 0xbe8c2d0c,
|
||||||
|
0xbe8e2d0e, 0xbf06807c,
|
||||||
|
0xbf84fff0, 0xb8f82a05,
|
||||||
|
0x80788178, 0x8e788a78,
|
||||||
|
0xb8ee1605, 0x806e816e,
|
||||||
|
0x8e6e866e, 0x80786e78,
|
||||||
|
0xbef60084, 0xbef600ff,
|
||||||
|
0x01000000, 0xc0211bfa,
|
||||||
0x00000078, 0x80788478,
|
0x00000078, 0x80788478,
|
||||||
0xc0211b7a, 0x00000078,
|
0xc0211b3a, 0x00000078,
|
||||||
0x80788478, 0xc0211eba,
|
0x80788478, 0xc0211b7a,
|
||||||
0x00000078, 0x80788478,
|
0x00000078, 0x80788478,
|
||||||
0xc0211efa, 0x00000078,
|
0xc0211c3a, 0x00000078,
|
||||||
0x80788478, 0xc0211c3a,
|
0x80788478, 0xc0211c7a,
|
||||||
0x00000078, 0x80788478,
|
0x00000078, 0x80788478,
|
||||||
0xc0211c7a, 0x00000078,
|
0xc0211eba, 0x00000078,
|
||||||
0x80788478, 0xc0211a3a,
|
0x80788478, 0xc0211efa,
|
||||||
0x00000078, 0x80788478,
|
0x00000078, 0x80788478,
|
||||||
0xc0211a7a, 0x00000078,
|
0xc0211a3a, 0x00000078,
|
||||||
0x80788478, 0xc0211cfa,
|
0x80788478, 0xc0211a7a,
|
||||||
0x00000078, 0x80788478,
|
0x00000078, 0x80788478,
|
||||||
0xbf8cc07f, 0xbefc006f,
|
0xc0211cfa, 0x00000078,
|
||||||
0xbefe007a, 0xbeff007b,
|
0x80788478, 0xbf8cc07f,
|
||||||
0x866f71ff, 0x000003ff,
|
0xbefc006f, 0xbefe0070,
|
||||||
0xb96f4803, 0x866f71ff,
|
0xbeff0071, 0x866f7bff,
|
||||||
0xfffff800, 0x8f6f8b6f,
|
0x000003ff, 0xb96f4803,
|
||||||
0xb96fa2c3, 0xb973f801,
|
0x866f7bff, 0xfffff800,
|
||||||
0xb8ee2a05, 0x806e816e,
|
0x8f6f8b6f, 0xb96fa2c3,
|
||||||
0x8e6e8a6e, 0xb8ef1605,
|
0xb973f801, 0xb8ee2a05,
|
||||||
0x806f816f, 0x8e6f866f,
|
0x806e816e, 0x8e6e8a6e,
|
||||||
0x806e6f6e, 0x806e746e,
|
0xb8ef1605, 0x806f816f,
|
||||||
0x826f8075, 0x866fff6f,
|
0x8e6f866f, 0x806e6f6e,
|
||||||
0x0000ffff, 0xc0071cb7,
|
0x806e746e, 0x826f8075,
|
||||||
0x00000040, 0xc00b1d37,
|
0x866fff6f, 0x0000ffff,
|
||||||
0x00000048, 0xc0031e77,
|
0xc00b1c37, 0x00000050,
|
||||||
0x00000058, 0xc0071eb7,
|
0xc00b1d37, 0x00000060,
|
||||||
0x0000005c, 0xbf8cc07f,
|
0xc0031e77, 0x00000074,
|
||||||
0x866fff6d, 0xf0000000,
|
0xbf8cc07f, 0x866fff6d,
|
||||||
0x8f6f9c6f, 0x8e6f906f,
|
0xf8000000, 0x8f6f9b6f,
|
||||||
0xbeee0080, 0x876e6f6e,
|
0x8e6f906f, 0xbeee0080,
|
||||||
0x866fff6d, 0x08000000,
|
0x876e6f6e, 0x866fff6d,
|
||||||
0x8f6f9b6f, 0x8e6f8f6f,
|
0x04000000, 0x8f6f9a6f,
|
||||||
0x876e6f6e, 0x866fff70,
|
0x8e6f8f6f, 0x876e6f6e,
|
||||||
0x00800000, 0x8f6f976f,
|
0x866fff7a, 0x00800000,
|
||||||
0xb96ef807, 0x866dff6d,
|
0x8f6f976f, 0xb96ef807,
|
||||||
0x0000ffff, 0x86fe7e7e,
|
0x866dff6d, 0x0000ffff,
|
||||||
0x86ea6a6a, 0x8f6e8370,
|
0x86fe7e7e, 0x86ea6a6a,
|
||||||
0xb96ee0c2, 0xbf800002,
|
0x8f6e837a, 0xb96ee0c2,
|
||||||
0xb9700002, 0xbf8a0000,
|
0xbf800002, 0xb97a0002,
|
||||||
0x95806f6c, 0xbf810000,
|
0xbf8a0000, 0x95806f6c,
|
||||||
|
0xbf810000, 0x00000000,
|
||||||
};
|
};
|
||||||
|
|
|
@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
|
||||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||||
end
|
end
|
||||||
|
|
||||||
//check whether there is mem_viol
|
|
||||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
|
||||||
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
|
|
||||||
s_cbranch_scc0 L_NO_PC_REWIND
|
|
||||||
|
|
||||||
//if so, need rewind PC assuming GDS operation gets NACKed
|
|
||||||
s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
|
|
||||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
|
|
||||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
|
||||||
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
|
|
||||||
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
|
|
||||||
|
|
||||||
L_NO_PC_REWIND:
|
|
||||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||||
|
|
||||||
|
|
|
@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
|
||||||
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
||||||
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||||
|
|
||||||
var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
|
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
|
||||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
|
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
|
||||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
|
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
|
||||||
|
|
||||||
var s_save_spi_init_lo = exec_lo
|
var s_save_spi_init_lo = exec_lo
|
||||||
var s_save_spi_init_hi = exec_hi
|
var s_save_spi_init_hi = exec_hi
|
||||||
|
@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
|
||||||
var s_save_pc_hi = ttmp1
|
var s_save_pc_hi = ttmp1
|
||||||
var s_save_exec_lo = ttmp2
|
var s_save_exec_lo = ttmp2
|
||||||
var s_save_exec_hi = ttmp3
|
var s_save_exec_hi = ttmp3
|
||||||
var s_save_tmp = ttmp4
|
var s_save_tmp = ttmp14
|
||||||
var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
|
var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
|
||||||
var s_save_xnack_mask_lo = ttmp6
|
var s_save_xnack_mask_lo = ttmp6
|
||||||
var s_save_xnack_mask_hi = ttmp7
|
var s_save_xnack_mask_hi = ttmp7
|
||||||
var s_save_buf_rsrc0 = ttmp8
|
var s_save_buf_rsrc0 = ttmp8
|
||||||
|
@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
|
||||||
var s_save_buf_rsrc2 = ttmp10
|
var s_save_buf_rsrc2 = ttmp10
|
||||||
var s_save_buf_rsrc3 = ttmp11
|
var s_save_buf_rsrc3 = ttmp11
|
||||||
var s_save_status = ttmp12
|
var s_save_status = ttmp12
|
||||||
var s_save_mem_offset = ttmp14
|
var s_save_mem_offset = ttmp4
|
||||||
var s_save_alloc_size = s_save_trapsts //conflict
|
var s_save_alloc_size = s_save_trapsts //conflict
|
||||||
var s_save_m0 = ttmp15
|
var s_save_m0 = ttmp5
|
||||||
var s_save_ttmps_lo = s_save_tmp //no conflict
|
var s_save_ttmps_lo = s_save_tmp //no conflict
|
||||||
var s_save_ttmps_hi = s_save_trapsts //no conflict
|
var s_save_ttmps_hi = s_save_trapsts //no conflict
|
||||||
|
|
||||||
|
@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
|
||||||
|
|
||||||
var s_restore_pc_lo = ttmp0
|
var s_restore_pc_lo = ttmp0
|
||||||
var s_restore_pc_hi = ttmp1
|
var s_restore_pc_hi = ttmp1
|
||||||
var s_restore_exec_lo = ttmp14
|
var s_restore_exec_lo = ttmp4
|
||||||
var s_restore_exec_hi = ttmp15
|
var s_restore_exec_hi = ttmp5
|
||||||
var s_restore_status = ttmp4
|
var s_restore_status = ttmp14
|
||||||
var s_restore_trapsts = ttmp5
|
var s_restore_trapsts = ttmp15
|
||||||
var s_restore_xnack_mask_lo = xnack_mask_lo
|
var s_restore_xnack_mask_lo = xnack_mask_lo
|
||||||
var s_restore_xnack_mask_hi = xnack_mask_hi
|
var s_restore_xnack_mask_hi = xnack_mask_hi
|
||||||
var s_restore_buf_rsrc0 = ttmp8
|
var s_restore_buf_rsrc0 = ttmp8
|
||||||
|
@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
|
||||||
|
|
||||||
L_HALT_WAVE:
|
L_HALT_WAVE:
|
||||||
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
|
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
|
||||||
// We cannot prevent further faults so just terminate the wavefront.
|
// We cannot prevent further faults. Spin wait until context saved.
|
||||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||||
s_cbranch_scc0 L_NOT_ALREADY_HALTED
|
s_cbranch_scc0 L_NOT_ALREADY_HALTED
|
||||||
s_endpgm
|
|
||||||
|
L_WAIT_CTX_SAVE:
|
||||||
|
s_sleep 0x10
|
||||||
|
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||||
|
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||||
|
s_cbranch_scc0 L_WAIT_CTX_SAVE
|
||||||
|
|
||||||
L_NOT_ALREADY_HALTED:
|
L_NOT_ALREADY_HALTED:
|
||||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||||
|
|
||||||
|
@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
|
||||||
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
||||||
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
||||||
// ttmp12 holds SQ_WAVE_STATUS
|
// ttmp12 holds SQ_WAVE_STATUS
|
||||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||||
s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
|
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
|
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
|
||||||
s_waitcnt lgkmcnt(0)
|
s_waitcnt lgkmcnt(0)
|
||||||
s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
|
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
|
||||||
s_waitcnt lgkmcnt(0)
|
s_waitcnt lgkmcnt(0)
|
||||||
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
||||||
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
||||||
|
@ -405,7 +411,7 @@ end
|
||||||
else
|
else
|
||||||
end
|
end
|
||||||
|
|
||||||
// Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||||
get_vgpr_size_bytes(s_save_ttmps_lo)
|
get_vgpr_size_bytes(s_save_ttmps_lo)
|
||||||
get_sgpr_size_bytes(s_save_ttmps_hi)
|
get_sgpr_size_bytes(s_save_ttmps_hi)
|
||||||
|
@ -413,13 +419,11 @@ end
|
||||||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
|
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
|
||||||
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
|
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
|
||||||
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
|
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
|
||||||
s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
|
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
|
||||||
ack_sqc_store_workaround()
|
ack_sqc_store_workaround()
|
||||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
|
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
|
||||||
ack_sqc_store_workaround()
|
ack_sqc_store_workaround()
|
||||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
|
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
|
||||||
ack_sqc_store_workaround()
|
|
||||||
s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
|
|
||||||
ack_sqc_store_workaround()
|
ack_sqc_store_workaround()
|
||||||
|
|
||||||
/* setup Resource Contants */
|
/* setup Resource Contants */
|
||||||
|
@ -1093,7 +1097,7 @@ end
|
||||||
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
|
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
|
||||||
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
|
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
|
||||||
|
|
||||||
// Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||||
get_vgpr_size_bytes(s_restore_ttmps_lo)
|
get_vgpr_size_bytes(s_restore_ttmps_lo)
|
||||||
get_sgpr_size_bytes(s_restore_ttmps_hi)
|
get_sgpr_size_bytes(s_restore_ttmps_hi)
|
||||||
|
@ -1101,10 +1105,9 @@ end
|
||||||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
|
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
|
||||||
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
|
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
|
||||||
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
|
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
|
||||||
s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
|
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
|
||||||
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
|
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
|
||||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
|
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
|
||||||
s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
|
|
||||||
s_waitcnt lgkmcnt(0)
|
s_waitcnt lgkmcnt(0)
|
||||||
|
|
||||||
//reuse s_restore_m0 as a temp register
|
//reuse s_restore_m0 as a temp register
|
||||||
|
|
|
@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
|
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
|
||||||
q_properties->type = KFD_QUEUE_TYPE_SDMA;
|
q_properties->type = KFD_QUEUE_TYPE_SDMA;
|
||||||
|
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
|
||||||
|
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
|
||||||
else
|
else
|
||||||
return -ENOTSUPP;
|
return -ENOTSUPP;
|
||||||
|
|
||||||
|
@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
|
||||||
struct kfd_process_device *pdd;
|
struct kfd_process_device *pdd;
|
||||||
|
|
||||||
dev = kfd_device_by_id(args->gpu_id);
|
dev = kfd_device_by_id(args->gpu_id);
|
||||||
if (dev == NULL)
|
if (!dev)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mutex_lock(&p->mutex);
|
mutex_lock(&p->mutex);
|
||||||
|
@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||||
if (args->size != kfd_doorbell_process_slice(dev))
|
if (args->size != kfd_doorbell_process_slice(dev))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
offset = kfd_get_process_doorbells(dev, p);
|
offset = kfd_get_process_doorbells(dev, p);
|
||||||
|
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||||
|
if (args->size != PAGE_SIZE)
|
||||||
|
return -EINVAL;
|
||||||
|
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||||
|
if (!offset)
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&p->mutex);
|
mutex_lock(&p->mutex);
|
||||||
|
@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||||
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
||||||
args->mmap_offset = offset;
|
args->mmap_offset = offset;
|
||||||
|
|
||||||
|
/* MMIO is mapped through kfd device
|
||||||
|
* Generate a kfd mmap offset
|
||||||
|
*/
|
||||||
|
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||||
|
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
|
||||||
|
args->mmap_offset <<= PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_free:
|
err_free:
|
||||||
|
@ -1551,6 +1567,32 @@ copy_from_user_failed:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
|
||||||
|
struct kfd_process *p, void *data)
|
||||||
|
{
|
||||||
|
int retval;
|
||||||
|
struct kfd_ioctl_alloc_queue_gws_args *args = data;
|
||||||
|
struct kfd_dev *dev;
|
||||||
|
|
||||||
|
if (!hws_gws_support)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
dev = kfd_device_by_id(args->gpu_id);
|
||||||
|
if (!dev) {
|
||||||
|
pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&p->mutex);
|
||||||
|
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
|
||||||
|
mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
|
args->first_gws = 0;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
||||||
struct kfd_process *p, void *data)
|
struct kfd_process *p, void *data)
|
||||||
{
|
{
|
||||||
|
@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
||||||
kfd_ioctl_import_dmabuf, 0),
|
kfd_ioctl_import_dmabuf, 0),
|
||||||
|
|
||||||
|
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
|
||||||
|
kfd_ioctl_alloc_queue_gws, 0),
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||||
|
@ -1845,6 +1889,39 @@ err_i1:
|
||||||
return retcode;
|
return retcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
phys_addr_t address;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||||
|
|
||||||
|
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
|
||||||
|
VM_DONTDUMP | VM_PFNMAP;
|
||||||
|
|
||||||
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||||
|
|
||||||
|
pr_debug("Process %d mapping mmio page\n"
|
||||||
|
" target user address == 0x%08llX\n"
|
||||||
|
" physical address == 0x%08llX\n"
|
||||||
|
" vm_flags == 0x%04lX\n"
|
||||||
|
" size == 0x%04lX\n",
|
||||||
|
process->pasid, (unsigned long long) vma->vm_start,
|
||||||
|
address, vma->vm_flags, PAGE_SIZE);
|
||||||
|
|
||||||
|
ret = io_remap_pfn_range(vma,
|
||||||
|
vma->vm_start,
|
||||||
|
address >> PAGE_SHIFT,
|
||||||
|
PAGE_SIZE,
|
||||||
|
vma->vm_page_prot);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
struct kfd_process *process;
|
struct kfd_process *process;
|
||||||
|
@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||||
if (!dev)
|
if (!dev)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
return kfd_reserved_mem_mmap(dev, process, vma);
|
return kfd_reserved_mem_mmap(dev, process, vma);
|
||||||
|
case KFD_MMAP_TYPE_MMIO:
|
||||||
|
if (!dev)
|
||||||
|
return -ENODEV;
|
||||||
|
return kfd_mmio_mmap(dev, process, vma);
|
||||||
}
|
}
|
||||||
|
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
|
@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||||
#define polaris10_cache_info carrizo_cache_info
|
#define polaris10_cache_info carrizo_cache_info
|
||||||
#define polaris11_cache_info carrizo_cache_info
|
#define polaris11_cache_info carrizo_cache_info
|
||||||
#define polaris12_cache_info carrizo_cache_info
|
#define polaris12_cache_info carrizo_cache_info
|
||||||
|
#define vegam_cache_info carrizo_cache_info
|
||||||
/* TODO - check & update Vega10 cache details */
|
/* TODO - check & update Vega10 cache details */
|
||||||
#define vega10_cache_info carrizo_cache_info
|
#define vega10_cache_info carrizo_cache_info
|
||||||
#define raven_cache_info carrizo_cache_info
|
#define raven_cache_info carrizo_cache_info
|
||||||
|
@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
|
||||||
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
|
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
|
||||||
props->weight = 20;
|
props->weight = 20;
|
||||||
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
|
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
|
||||||
props->weight = 15;
|
props->weight = 15 * iolink->num_hops_xgmi;
|
||||||
else
|
else
|
||||||
props->weight = node_distance(id_from, id_to);
|
props->weight = node_distance(id_from, id_to);
|
||||||
|
|
||||||
|
@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||||
pcache_info = polaris12_cache_info;
|
pcache_info = polaris12_cache_info;
|
||||||
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
||||||
break;
|
break;
|
||||||
|
case CHIP_VEGAM:
|
||||||
|
pcache_info = vegam_cache_info;
|
||||||
|
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
||||||
|
break;
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
|
@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
|
||||||
|
|
||||||
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
||||||
struct kfd_dev *kdev,
|
struct kfd_dev *kdev,
|
||||||
|
struct kfd_dev *peer_kdev,
|
||||||
struct crat_subtype_iolink *sub_type_hdr,
|
struct crat_subtype_iolink *sub_type_hdr,
|
||||||
uint32_t proximity_domain_from,
|
uint32_t proximity_domain_from,
|
||||||
uint32_t proximity_domain_to)
|
uint32_t proximity_domain_to)
|
||||||
|
@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
||||||
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
|
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
|
||||||
sub_type_hdr->proximity_domain_from = proximity_domain_from;
|
sub_type_hdr->proximity_domain_from = proximity_domain_from;
|
||||||
sub_type_hdr->proximity_domain_to = proximity_domain_to;
|
sub_type_hdr->proximity_domain_to = proximity_domain_to;
|
||||||
|
sub_type_hdr->num_hops_xgmi =
|
||||||
|
amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
||||||
(char *)sub_type_hdr +
|
(char *)sub_type_hdr +
|
||||||
sizeof(struct crat_subtype_iolink));
|
sizeof(struct crat_subtype_iolink));
|
||||||
ret = kfd_fill_gpu_xgmi_link_to_gpu(
|
ret = kfd_fill_gpu_xgmi_link_to_gpu(
|
||||||
&avail_size, kdev,
|
&avail_size, kdev, peer_dev->gpu,
|
||||||
(struct crat_subtype_iolink *)sub_type_hdr,
|
(struct crat_subtype_iolink *)sub_type_hdr,
|
||||||
proximity_domain, nid);
|
proximity_domain, nid);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
|
|
@ -274,7 +274,8 @@ struct crat_subtype_iolink {
|
||||||
uint32_t minimum_bandwidth_mbs;
|
uint32_t minimum_bandwidth_mbs;
|
||||||
uint32_t maximum_bandwidth_mbs;
|
uint32_t maximum_bandwidth_mbs;
|
||||||
uint32_t recommended_transfer_size;
|
uint32_t recommended_transfer_size;
|
||||||
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
|
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
|
||||||
|
uint8_t num_hops_xgmi;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
||||||
.needs_iommu_device = true,
|
.needs_iommu_device = true,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||||
.needs_iommu_device = true,
|
.needs_iommu_device = true,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
|
||||||
.needs_iommu_device = true,
|
.needs_iommu_device = true,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 1,
|
.num_sdma_engines = 1,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = true,
|
.needs_pci_atomics = true,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
|
.num_sdma_queues_per_engine = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct kfd_device_info vegam_device_info = {
|
||||||
|
.asic_family = CHIP_VEGAM,
|
||||||
|
.max_pasid_bits = 16,
|
||||||
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
|
.num_of_watch_points = 4,
|
||||||
|
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||||
|
.supports_cwsr = true,
|
||||||
|
.needs_iommu_device = false,
|
||||||
|
.needs_pci_atomics = true,
|
||||||
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 2,
|
.num_sdma_queues_per_engine = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
|
||||||
.needs_iommu_device = false,
|
.needs_iommu_device = false,
|
||||||
.needs_pci_atomics = false,
|
.needs_pci_atomics = false,
|
||||||
.num_sdma_engines = 2,
|
.num_sdma_engines = 2,
|
||||||
|
.num_xgmi_sdma_engines = 0,
|
||||||
.num_sdma_queues_per_engine = 8,
|
.num_sdma_queues_per_engine = 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
|
||||||
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
|
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
|
||||||
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
|
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
|
||||||
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
|
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
|
||||||
|
{ 0x694C, &vegam_device_info }, /* VegaM */
|
||||||
|
{ 0x694E, &vegam_device_info }, /* VegaM */
|
||||||
|
{ 0x694F, &vegam_device_info }, /* VegaM */
|
||||||
{ 0x6860, &vega10_device_info }, /* Vega10 */
|
{ 0x6860, &vega10_device_info }, /* Vega10 */
|
||||||
{ 0x6861, &vega10_device_info }, /* Vega10 */
|
{ 0x6861, &vega10_device_info }, /* Vega10 */
|
||||||
{ 0x6862, &vega10_device_info }, /* Vega10 */
|
{ 0x6862, &vega10_device_info }, /* Vega10 */
|
||||||
|
@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||||
} else
|
} else
|
||||||
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
||||||
|
|
||||||
|
/* Allocate global GWS that is shared by all KFD processes */
|
||||||
|
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||||
|
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
|
||||||
|
dev_err(kfd_device, "Could not allocate %d gws\n",
|
||||||
|
amdgpu_amdkfd_get_num_gws(kfd->kgd));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
/* calculate max size of mqds needed for queues */
|
/* calculate max size of mqds needed for queues */
|
||||||
size = max_num_of_queues_per_device *
|
size = max_num_of_queues_per_device *
|
||||||
kfd->device_info->mqd_size_aligned;
|
kfd->device_info->mqd_size_aligned;
|
||||||
|
@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||||
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
|
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
|
||||||
false)) {
|
false)) {
|
||||||
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
|
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
|
||||||
goto out;
|
goto alloc_gtt_mem_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
|
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
|
||||||
|
@ -611,6 +653,9 @@ kfd_doorbell_error:
|
||||||
kfd_gtt_sa_fini(kfd);
|
kfd_gtt_sa_fini(kfd);
|
||||||
kfd_gtt_sa_init_error:
|
kfd_gtt_sa_init_error:
|
||||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||||
|
alloc_gtt_mem_failure:
|
||||||
|
if (hws_gws_support)
|
||||||
|
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||||
dev_err(kfd_device,
|
dev_err(kfd_device,
|
||||||
"device %x:%x NOT added due to errors\n",
|
"device %x:%x NOT added due to errors\n",
|
||||||
kfd->pdev->vendor, kfd->pdev->device);
|
kfd->pdev->vendor, kfd->pdev->device);
|
||||||
|
@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||||
kfd_doorbell_fini(kfd);
|
kfd_doorbell_fini(kfd);
|
||||||
kfd_gtt_sa_fini(kfd);
|
kfd_gtt_sa_fini(kfd);
|
||||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||||
|
if (hws_gws_support)
|
||||||
|
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(kfd);
|
kfree(kfd);
|
||||||
|
|
|
@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd);
|
struct qcm_process_device *qpd);
|
||||||
|
|
||||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||||
unsigned int sdma_queue_id);
|
struct queue *q);
|
||||||
|
|
||||||
static void kfd_process_hw_exception(struct work_struct *work);
|
static void kfd_process_hw_exception(struct work_struct *work);
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
||||||
{
|
{
|
||||||
if (type == KFD_QUEUE_TYPE_SDMA)
|
if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
return KFD_MQD_TYPE_SDMA;
|
return KFD_MQD_TYPE_SDMA;
|
||||||
return KFD_MQD_TYPE_CP;
|
return KFD_MQD_TYPE_CP;
|
||||||
}
|
}
|
||||||
|
@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
|
||||||
return dqm->dev->device_info->num_sdma_engines;
|
return dqm->dev->device_info->num_sdma_engines;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
|
||||||
|
{
|
||||||
|
return dqm->dev->device_info->num_xgmi_sdma_engines;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
|
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
|
||||||
{
|
{
|
||||||
return dqm->dev->device_info->num_sdma_engines
|
return dqm->dev->device_info->num_sdma_engines
|
||||||
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
|
||||||
|
{
|
||||||
|
return dqm->dev->device_info->num_xgmi_sdma_engines
|
||||||
|
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||||
|
}
|
||||||
|
|
||||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd)
|
struct qcm_process_device *qpd)
|
||||||
{
|
{
|
||||||
|
@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||||
* preserve the user mode ABI.
|
* preserve the user mode ABI.
|
||||||
*/
|
*/
|
||||||
q->doorbell_id = q->properties.queue_id;
|
q->doorbell_id = q->properties.queue_id;
|
||||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
|
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
|
||||||
* doorbell assignments based on the engine and queue id.
|
* doorbell assignments based on the engine and queue id.
|
||||||
* The doobell index distance between RLC (2*i) and (2*i+1)
|
* The doobell index distance between RLC (2*i) and (2*i+1)
|
||||||
|
@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
|
||||||
struct kfd_dev *dev = qpd->dqm->dev;
|
struct kfd_dev *dev = qpd->dqm->dev;
|
||||||
|
|
||||||
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
|
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
|
||||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
|
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
|
||||||
|
@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
||||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
|
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
|
||||||
else
|
else
|
||||||
retval = -EINVAL;
|
retval = -EINVAL;
|
||||||
|
@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||||
dqm->sdma_queue_count++;
|
dqm->sdma_queue_count++;
|
||||||
|
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
|
dqm->xgmi_sdma_queue_count++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unconditionally increment this counter, regardless of the queue's
|
* Unconditionally increment this counter, regardless of the queue's
|
||||||
|
@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct mqd_manager *mqd_mgr;
|
struct mqd_manager *mqd_mgr;
|
||||||
int retval;
|
int retval;
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||||
if (!mqd_mgr)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
retval = allocate_hqd(dqm, q);
|
retval = allocate_hqd(dqm, q);
|
||||||
if (retval)
|
if (retval)
|
||||||
|
@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||||
int retval;
|
int retval;
|
||||||
struct mqd_manager *mqd_mgr;
|
struct mqd_manager *mqd_mgr;
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||||
deallocate_hqd(dqm, q);
|
deallocate_hqd(dqm, q);
|
||||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||||
dqm->sdma_queue_count--;
|
dqm->sdma_queue_count--;
|
||||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
deallocate_sdma_queue(dqm, q);
|
||||||
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
dqm->xgmi_sdma_queue_count--;
|
||||||
|
deallocate_sdma_queue(dqm, q);
|
||||||
} else {
|
} else {
|
||||||
pr_debug("q->properties.type %d is invalid\n",
|
pr_debug("q->properties.type %d is invalid\n",
|
||||||
q->properties.type);
|
q->properties.type);
|
||||||
|
@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||||
retval = -ENODEV;
|
retval = -ENODEV;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr) {
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Eviction state logic: we only mark active queues as evicted
|
* Eviction state logic: we only mark active queues as evicted
|
||||||
* to avoid the overhead of restoring inactive queues later
|
* to avoid the overhead of restoring inactive queues later
|
||||||
|
@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||||
}
|
}
|
||||||
} else if (prev_active &&
|
} else if (prev_active &&
|
||||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||||
|
@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||||
retval = map_queues_cpsch(dqm);
|
retval = map_queues_cpsch(dqm);
|
||||||
else if (q->properties.is_active &&
|
else if (q->properties.is_active &&
|
||||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||||
if (WARN(q->process->mm != current->mm,
|
if (WARN(q->process->mm != current->mm,
|
||||||
"should only run in user thread"))
|
"should only run in user thread"))
|
||||||
retval = -EFAULT;
|
retval = -EFAULT;
|
||||||
|
@ -571,27 +584,6 @@ out_unlock:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mqd_manager *get_mqd_manager(
|
|
||||||
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
|
|
||||||
{
|
|
||||||
struct mqd_manager *mqd_mgr;
|
|
||||||
|
|
||||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
pr_debug("mqd type %d\n", type);
|
|
||||||
|
|
||||||
mqd_mgr = dqm->mqd_mgrs[type];
|
|
||||||
if (!mqd_mgr) {
|
|
||||||
mqd_mgr = mqd_manager_init(type, dqm->dev);
|
|
||||||
if (!mqd_mgr)
|
|
||||||
pr_err("mqd manager is NULL");
|
|
||||||
dqm->mqd_mgrs[type] = mqd_mgr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return mqd_mgr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd)
|
struct qcm_process_device *qpd)
|
||||||
{
|
{
|
||||||
|
@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||||
if (!q->properties.is_active)
|
if (!q->properties.is_active)
|
||||||
continue;
|
continue;
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr) { /* should not be here */
|
|
||||||
pr_err("Cannot evict queue, mqd mgr is NULL\n");
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
q->properties.is_evicted = true;
|
q->properties.is_evicted = true;
|
||||||
q->properties.is_active = false;
|
q->properties.is_active = false;
|
||||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||||
|
@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||||
if (!q->properties.is_evicted)
|
if (!q->properties.is_evicted)
|
||||||
continue;
|
continue;
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr) { /* should not be here */
|
|
||||||
pr_err("Cannot restore queue, mqd mgr is NULL\n");
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
q->properties.is_evicted = false;
|
q->properties.is_evicted = false;
|
||||||
q->properties.is_active = true;
|
q->properties.is_active = true;
|
||||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
|
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
|
||||||
|
@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
|
||||||
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
||||||
|
|
||||||
dqm->processes_count++;
|
dqm->processes_count++;
|
||||||
kfd_inc_compute_active(dqm->dev);
|
|
||||||
|
|
||||||
dqm_unlock(dqm);
|
dqm_unlock(dqm);
|
||||||
|
|
||||||
|
/* Outside the DQM lock because under the DQM lock we can't do
|
||||||
|
* reclaim or take other locks that others hold while reclaiming.
|
||||||
|
*/
|
||||||
|
kfd_inc_compute_active(dqm->dev);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
|
||||||
list_del(&cur->list);
|
list_del(&cur->list);
|
||||||
kfree(cur);
|
kfree(cur);
|
||||||
dqm->processes_count--;
|
dqm->processes_count--;
|
||||||
kfd_dec_compute_active(dqm->dev);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
|
||||||
retval = 1;
|
retval = 1;
|
||||||
out:
|
out:
|
||||||
dqm_unlock(dqm);
|
dqm_unlock(dqm);
|
||||||
|
|
||||||
|
/* Outside the DQM lock because under the DQM lock we can't do
|
||||||
|
* reclaim or take other locks that others hold while reclaiming.
|
||||||
|
*/
|
||||||
|
if (!retval)
|
||||||
|
kfd_dec_compute_active(dqm->dev);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||||
INIT_LIST_HEAD(&dqm->queues);
|
INIT_LIST_HEAD(&dqm->queues);
|
||||||
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
||||||
dqm->sdma_queue_count = 0;
|
dqm->sdma_queue_count = 0;
|
||||||
|
dqm->xgmi_sdma_queue_count = 0;
|
||||||
|
|
||||||
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||||
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||||
|
@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||||
}
|
}
|
||||||
|
|
||||||
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
||||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||||
|
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
||||||
unsigned int *sdma_queue_id)
|
struct queue *q)
|
||||||
{
|
{
|
||||||
int bit;
|
int bit;
|
||||||
|
|
||||||
if (dqm->sdma_bitmap == 0)
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||||
return -ENOMEM;
|
if (dqm->sdma_bitmap == 0)
|
||||||
|
return -ENOMEM;
|
||||||
|
bit = __ffs64(dqm->sdma_bitmap);
|
||||||
|
dqm->sdma_bitmap &= ~(1ULL << bit);
|
||||||
|
q->sdma_id = bit;
|
||||||
|
q->properties.sdma_engine_id = q->sdma_id %
|
||||||
|
get_num_sdma_engines(dqm);
|
||||||
|
q->properties.sdma_queue_id = q->sdma_id /
|
||||||
|
get_num_sdma_engines(dqm);
|
||||||
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
if (dqm->xgmi_sdma_bitmap == 0)
|
||||||
|
return -ENOMEM;
|
||||||
|
bit = __ffs64(dqm->xgmi_sdma_bitmap);
|
||||||
|
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
|
||||||
|
q->sdma_id = bit;
|
||||||
|
/* sdma_engine_id is sdma id including
|
||||||
|
* both PCIe-optimized SDMAs and XGMI-
|
||||||
|
* optimized SDMAs. The calculation below
|
||||||
|
* assumes the first N engines are always
|
||||||
|
* PCIe-optimized ones
|
||||||
|
*/
|
||||||
|
q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
|
||||||
|
q->sdma_id % get_num_xgmi_sdma_engines(dqm);
|
||||||
|
q->properties.sdma_queue_id = q->sdma_id /
|
||||||
|
get_num_xgmi_sdma_engines(dqm);
|
||||||
|
}
|
||||||
|
|
||||||
bit = ffs(dqm->sdma_bitmap) - 1;
|
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||||
dqm->sdma_bitmap &= ~(1 << bit);
|
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||||
*sdma_queue_id = bit;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||||
unsigned int sdma_queue_id)
|
struct queue *q)
|
||||||
{
|
{
|
||||||
if (sdma_queue_id >= get_num_sdma_queues(dqm))
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||||
return;
|
if (q->sdma_id >= get_num_sdma_queues(dqm))
|
||||||
dqm->sdma_bitmap |= (1 << sdma_queue_id);
|
return;
|
||||||
|
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
|
||||||
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
|
||||||
|
return;
|
||||||
|
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
|
@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct mqd_manager *mqd_mgr;
|
struct mqd_manager *mqd_mgr;
|
||||||
int retval;
|
int retval;
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
|
||||||
if (!mqd_mgr)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
retval = allocate_sdma_queue(dqm, q);
|
||||||
if (retval)
|
if (retval)
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
|
|
||||||
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
|
|
||||||
|
|
||||||
retval = allocate_doorbell(qpd, q);
|
retval = allocate_doorbell(qpd, q);
|
||||||
if (retval)
|
if (retval)
|
||||||
goto out_deallocate_sdma_queue;
|
goto out_deallocate_sdma_queue;
|
||||||
|
|
||||||
pr_debug("SDMA id is: %d\n", q->sdma_id);
|
|
||||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
|
||||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
|
||||||
|
|
||||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||||
&q->gart_mqd_addr, &q->properties);
|
&q->gart_mqd_addr, &q->properties);
|
||||||
|
@ -987,7 +1002,7 @@ out_uninit_mqd:
|
||||||
out_deallocate_doorbell:
|
out_deallocate_doorbell:
|
||||||
deallocate_doorbell(qpd, q);
|
deallocate_doorbell(qpd, q);
|
||||||
out_deallocate_sdma_queue:
|
out_deallocate_sdma_queue:
|
||||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
deallocate_sdma_queue(dqm, q);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
||||||
INIT_LIST_HEAD(&dqm->queues);
|
INIT_LIST_HEAD(&dqm->queues);
|
||||||
dqm->queue_count = dqm->processes_count = 0;
|
dqm->queue_count = dqm->processes_count = 0;
|
||||||
dqm->sdma_queue_count = 0;
|
dqm->sdma_queue_count = 0;
|
||||||
|
dqm->xgmi_sdma_queue_count = 0;
|
||||||
dqm->active_runlist = false;
|
dqm->active_runlist = false;
|
||||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||||
|
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||||
|
|
||||||
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
||||||
|
|
||||||
|
@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
int retval;
|
int retval;
|
||||||
struct mqd_manager *mqd_mgr;
|
struct mqd_manager *mqd_mgr;
|
||||||
|
|
||||||
retval = 0;
|
|
||||||
|
|
||||||
dqm_lock(dqm);
|
|
||||||
|
|
||||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||||
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
||||||
dqm->total_queue_count);
|
dqm->total_queue_count);
|
||||||
retval = -EPERM;
|
retval = -EPERM;
|
||||||
goto out_unlock;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
retval = allocate_sdma_queue(dqm, q);
|
||||||
if (retval)
|
if (retval)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
q->properties.sdma_queue_id =
|
|
||||||
q->sdma_id / get_num_sdma_engines(dqm);
|
|
||||||
q->properties.sdma_engine_id =
|
|
||||||
q->sdma_id % get_num_sdma_engines(dqm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
retval = allocate_doorbell(qpd, q);
|
retval = allocate_doorbell(qpd, q);
|
||||||
if (retval)
|
if (retval)
|
||||||
goto out_deallocate_sdma_queue;
|
goto out_deallocate_sdma_queue;
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
|
|
||||||
if (!mqd_mgr) {
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto out_deallocate_doorbell;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Eviction state logic: we only mark active queues as evicted
|
* Eviction state logic: we only mark active queues as evicted
|
||||||
* to avoid the overhead of restoring inactive queues later
|
* to avoid the overhead of restoring inactive queues later
|
||||||
|
@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||||
q->properties.queue_percent > 0 &&
|
q->properties.queue_percent > 0 &&
|
||||||
q->properties.queue_address != 0);
|
q->properties.queue_address != 0);
|
||||||
|
|
||||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||||
|
|
||||||
q->properties.tba_addr = qpd->tba_addr;
|
q->properties.tba_addr = qpd->tba_addr;
|
||||||
q->properties.tma_addr = qpd->tma_addr;
|
q->properties.tma_addr = qpd->tma_addr;
|
||||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||||
|
@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
if (retval)
|
if (retval)
|
||||||
goto out_deallocate_doorbell;
|
goto out_deallocate_doorbell;
|
||||||
|
|
||||||
|
dqm_lock(dqm);
|
||||||
|
|
||||||
list_add(&q->list, &qpd->queues_list);
|
list_add(&q->list, &qpd->queues_list);
|
||||||
qpd->queue_count++;
|
qpd->queue_count++;
|
||||||
if (q->properties.is_active) {
|
if (q->properties.is_active) {
|
||||||
|
@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||||
dqm->sdma_queue_count++;
|
dqm->sdma_queue_count++;
|
||||||
|
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
|
dqm->xgmi_sdma_queue_count++;
|
||||||
/*
|
/*
|
||||||
* Unconditionally increment this counter, regardless of the queue's
|
* Unconditionally increment this counter, regardless of the queue's
|
||||||
* type or whether the queue is active.
|
* type or whether the queue is active.
|
||||||
|
@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
out_deallocate_doorbell:
|
out_deallocate_doorbell:
|
||||||
deallocate_doorbell(qpd, q);
|
deallocate_doorbell(qpd, q);
|
||||||
out_deallocate_sdma_queue:
|
out_deallocate_sdma_queue:
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||||
out_unlock:
|
deallocate_sdma_queue(dqm, q);
|
||||||
dqm_unlock(dqm);
|
out:
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int unmap_sdma_queues(struct device_queue_manager *dqm,
|
static int unmap_sdma_queues(struct device_queue_manager *dqm)
|
||||||
unsigned int sdma_engine)
|
|
||||||
{
|
{
|
||||||
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
int i, retval = 0;
|
||||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
|
|
||||||
sdma_engine);
|
for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
|
||||||
|
dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
|
||||||
|
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||||
|
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
|
||||||
|
if (retval)
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* dqm->lock mutex has to be locked before calling this function */
|
/* dqm->lock mutex has to be locked before calling this function */
|
||||||
|
@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
|
||||||
if (!dqm->active_runlist)
|
if (!dqm->active_runlist)
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
pr_debug("Before destroying queues, sdma queue count is : %u\n",
|
pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
|
||||||
dqm->sdma_queue_count);
|
dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
|
||||||
|
|
||||||
if (dqm->sdma_queue_count > 0) {
|
if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
|
||||||
unmap_sdma_queues(dqm, 0);
|
unmap_sdma_queues(dqm);
|
||||||
unmap_sdma_queues(dqm, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
|
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
|
||||||
filter, filter_param, false, 0);
|
filter, filter_param, false, 0);
|
||||||
|
@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr) {
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto failed;
|
|
||||||
}
|
|
||||||
|
|
||||||
deallocate_doorbell(qpd, q);
|
deallocate_doorbell(qpd, q);
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||||
dqm->sdma_queue_count--;
|
dqm->sdma_queue_count--;
|
||||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
deallocate_sdma_queue(dqm, q);
|
||||||
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
dqm->xgmi_sdma_queue_count--;
|
||||||
|
deallocate_sdma_queue(dqm, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_del(&q->list);
|
list_del(&q->list);
|
||||||
|
@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||||
qpd->reset_wavefronts = true;
|
qpd->reset_wavefronts = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unconditionally decrement this counter, regardless of the queue's
|
* Unconditionally decrement this counter, regardless of the queue's
|
||||||
* type
|
* type
|
||||||
|
@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||||
|
|
||||||
dqm_unlock(dqm);
|
dqm_unlock(dqm);
|
||||||
|
|
||||||
|
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
|
||||||
|
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
failed:
|
|
||||||
failed_try_destroy_debugged_queue:
|
failed_try_destroy_debugged_queue:
|
||||||
|
|
||||||
dqm_unlock(dqm);
|
dqm_unlock(dqm);
|
||||||
|
@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct queue *q, *next;
|
struct queue *q, *next;
|
||||||
struct device_process_node *cur, *next_dpn;
|
struct device_process_node *cur, *next_dpn;
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
dqm_lock(dqm);
|
dqm_lock(dqm);
|
||||||
|
|
||||||
|
@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||||
list_del(&cur->list);
|
list_del(&cur->list);
|
||||||
kfree(cur);
|
kfree(cur);
|
||||||
dqm->processes_count--;
|
dqm->processes_count--;
|
||||||
kfd_dec_compute_active(dqm->dev);
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dqm_unlock(dqm);
|
dqm_unlock(dqm);
|
||||||
|
|
||||||
|
/* Outside the DQM lock because under the DQM lock we can't do
|
||||||
|
* reclaim or take other locks that others hold while reclaiming.
|
||||||
|
*/
|
||||||
|
if (found)
|
||||||
|
kfd_dec_compute_active(dqm->dev);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
|
||||||
goto dqm_unlock;
|
goto dqm_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||||
if (!mqd_mgr) {
|
|
||||||
r = -ENOMEM;
|
|
||||||
goto dqm_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mqd_mgr->get_wave_state) {
|
if (!mqd_mgr->get_wave_state) {
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
|
@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||||
struct device_process_node *cur, *next_dpn;
|
struct device_process_node *cur, *next_dpn;
|
||||||
enum kfd_unmap_queues_filter filter =
|
enum kfd_unmap_queues_filter filter =
|
||||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
|
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
retval = 0;
|
retval = 0;
|
||||||
|
|
||||||
|
@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||||
dqm->sdma_queue_count--;
|
dqm->sdma_queue_count--;
|
||||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
deallocate_sdma_queue(dqm, q);
|
||||||
|
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||||
|
dqm->xgmi_sdma_queue_count--;
|
||||||
|
deallocate_sdma_queue(dqm, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (q->properties.is_active)
|
if (q->properties.is_active)
|
||||||
|
@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||||
list_del(&cur->list);
|
list_del(&cur->list);
|
||||||
kfree(cur);
|
kfree(cur);
|
||||||
dqm->processes_count--;
|
dqm->processes_count--;
|
||||||
kfd_dec_compute_active(dqm->dev);
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||||
qpd->reset_wavefronts = false;
|
qpd->reset_wavefronts = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* lastly, free mqd resources */
|
dqm_unlock(dqm);
|
||||||
|
|
||||||
|
/* Outside the DQM lock because under the DQM lock we can't do
|
||||||
|
* reclaim or take other locks that others hold while reclaiming.
|
||||||
|
*/
|
||||||
|
if (found)
|
||||||
|
kfd_dec_compute_active(dqm->dev);
|
||||||
|
|
||||||
|
/* Lastly, free mqd resources.
|
||||||
|
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
|
||||||
|
*/
|
||||||
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
||||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||||
get_mqd_type_from_queue_type(q->properties.type));
|
q->properties.type)];
|
||||||
if (!mqd_mgr) {
|
|
||||||
retval = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
list_del(&q->list);
|
list_del(&q->list);
|
||||||
qpd->queue_count--;
|
qpd->queue_count--;
|
||||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
return retval;
|
||||||
dqm_unlock(dqm);
|
}
|
||||||
|
|
||||||
|
static int init_mqd_managers(struct device_queue_manager *dqm)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
struct mqd_manager *mqd_mgr;
|
||||||
|
|
||||||
|
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
|
||||||
|
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
|
||||||
|
if (!mqd_mgr) {
|
||||||
|
pr_err("mqd manager [%d] initialization failed\n", i);
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
dqm->mqd_mgrs[i] = mqd_mgr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
kfree(dqm->mqd_mgrs[j]);
|
||||||
|
dqm->mqd_mgrs[j] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
|
||||||
|
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
|
||||||
|
{
|
||||||
|
int retval;
|
||||||
|
struct kfd_dev *dev = dqm->dev;
|
||||||
|
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
|
||||||
|
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
|
||||||
|
dev->device_info->num_sdma_engines *
|
||||||
|
dev->device_info->num_sdma_queues_per_engine +
|
||||||
|
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||||
|
|
||||||
|
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
|
||||||
|
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
|
||||||
|
(void *)&(mem_obj->cpu_ptr), true);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
dqm->ops.stop = stop_cpsch;
|
dqm->ops.stop = stop_cpsch;
|
||||||
dqm->ops.destroy_queue = destroy_queue_cpsch;
|
dqm->ops.destroy_queue = destroy_queue_cpsch;
|
||||||
dqm->ops.update_queue = update_queue;
|
dqm->ops.update_queue = update_queue;
|
||||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
|
||||||
dqm->ops.register_process = register_process;
|
dqm->ops.register_process = register_process;
|
||||||
dqm->ops.unregister_process = unregister_process;
|
dqm->ops.unregister_process = unregister_process;
|
||||||
dqm->ops.uninitialize = uninitialize;
|
dqm->ops.uninitialize = uninitialize;
|
||||||
|
@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
dqm->ops.create_queue = create_queue_nocpsch;
|
dqm->ops.create_queue = create_queue_nocpsch;
|
||||||
dqm->ops.destroy_queue = destroy_queue_nocpsch;
|
dqm->ops.destroy_queue = destroy_queue_nocpsch;
|
||||||
dqm->ops.update_queue = update_queue;
|
dqm->ops.update_queue = update_queue;
|
||||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
|
||||||
dqm->ops.register_process = register_process;
|
dqm->ops.register_process = register_process;
|
||||||
dqm->ops.unregister_process = unregister_process;
|
dqm->ops.unregister_process = unregister_process;
|
||||||
dqm->ops.initialize = initialize_nocpsch;
|
dqm->ops.initialize = initialize_nocpsch;
|
||||||
|
@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
|
case CHIP_VEGAM:
|
||||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (init_mqd_managers(dqm))
|
||||||
|
goto out_free;
|
||||||
|
|
||||||
|
if (allocate_hiq_sdma_mqd(dqm)) {
|
||||||
|
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
if (!dqm->ops.initialize(dqm))
|
if (!dqm->ops.initialize(dqm))
|
||||||
return dqm;
|
return dqm;
|
||||||
|
|
||||||
|
@ -1772,9 +1843,17 @@ out_free:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
|
||||||
|
{
|
||||||
|
WARN(!mqd, "No hiq sdma mqd trunk to free");
|
||||||
|
|
||||||
|
amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
|
||||||
|
}
|
||||||
|
|
||||||
void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||||
{
|
{
|
||||||
dqm->ops.uninitialize(dqm);
|
dqm->ops.uninitialize(dqm);
|
||||||
|
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
|
||||||
kfree(dqm);
|
kfree(dqm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,8 +48,6 @@ struct device_process_node {
|
||||||
*
|
*
|
||||||
* @update_queue: Queue update routine.
|
* @update_queue: Queue update routine.
|
||||||
*
|
*
|
||||||
* @get_mqd_manager: Returns the mqd manager according to the mqd type.
|
|
||||||
*
|
|
||||||
* @exeute_queues: Dispatches the queues list to the H/W.
|
* @exeute_queues: Dispatches the queues list to the H/W.
|
||||||
*
|
*
|
||||||
* @register_process: This routine associates a specific process with device.
|
* @register_process: This routine associates a specific process with device.
|
||||||
|
@ -97,10 +95,6 @@ struct device_queue_manager_ops {
|
||||||
int (*update_queue)(struct device_queue_manager *dqm,
|
int (*update_queue)(struct device_queue_manager *dqm,
|
||||||
struct queue *q);
|
struct queue *q);
|
||||||
|
|
||||||
struct mqd_manager * (*get_mqd_manager)
|
|
||||||
(struct device_queue_manager *dqm,
|
|
||||||
enum KFD_MQD_TYPE type);
|
|
||||||
|
|
||||||
int (*register_process)(struct device_queue_manager *dqm,
|
int (*register_process)(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd);
|
struct qcm_process_device *qpd);
|
||||||
|
|
||||||
|
@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
|
||||||
void (*init_sdma_vm)(struct device_queue_manager *dqm,
|
void (*init_sdma_vm)(struct device_queue_manager *dqm,
|
||||||
struct queue *q,
|
struct queue *q,
|
||||||
struct qcm_process_device *qpd);
|
struct qcm_process_device *qpd);
|
||||||
|
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
|
||||||
|
struct kfd_dev *dev);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -185,10 +181,12 @@ struct device_queue_manager {
|
||||||
unsigned int processes_count;
|
unsigned int processes_count;
|
||||||
unsigned int queue_count;
|
unsigned int queue_count;
|
||||||
unsigned int sdma_queue_count;
|
unsigned int sdma_queue_count;
|
||||||
|
unsigned int xgmi_sdma_queue_count;
|
||||||
unsigned int total_queue_count;
|
unsigned int total_queue_count;
|
||||||
unsigned int next_pipe_to_allocate;
|
unsigned int next_pipe_to_allocate;
|
||||||
unsigned int *allocated_queues;
|
unsigned int *allocated_queues;
|
||||||
unsigned int sdma_bitmap;
|
uint64_t sdma_bitmap;
|
||||||
|
uint64_t xgmi_sdma_bitmap;
|
||||||
unsigned int vmid_bitmap;
|
unsigned int vmid_bitmap;
|
||||||
uint64_t pipelines_addr;
|
uint64_t pipelines_addr;
|
||||||
struct kfd_mem_obj *pipeline_mem;
|
struct kfd_mem_obj *pipeline_mem;
|
||||||
|
@ -201,6 +199,7 @@ struct device_queue_manager {
|
||||||
/* hw exception */
|
/* hw exception */
|
||||||
bool is_hws_hang;
|
bool is_hws_hang;
|
||||||
struct work_struct hw_exception_work;
|
struct work_struct hw_exception_work;
|
||||||
|
struct kfd_mem_obj hiq_sdma_mqd;
|
||||||
};
|
};
|
||||||
|
|
||||||
void device_queue_manager_init_cik(
|
void device_queue_manager_init_cik(
|
||||||
|
@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
|
||||||
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
|
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
|
||||||
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
|
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
|
||||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
|
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
|
||||||
|
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
|
||||||
|
|
||||||
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||||
{
|
{
|
||||||
|
|
|
@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
|
||||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||||
asic_ops->update_qpd = update_qpd_cik;
|
asic_ops->update_qpd = update_qpd_cik;
|
||||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||||
|
asic_ops->mqd_manager_init = mqd_manager_init_cik;
|
||||||
}
|
}
|
||||||
|
|
||||||
void device_queue_manager_init_cik_hawaii(
|
void device_queue_manager_init_cik_hawaii(
|
||||||
|
@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
|
||||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||||
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
||||||
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
||||||
|
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||||
|
|
|
@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
|
||||||
{
|
{
|
||||||
asic_ops->update_qpd = update_qpd_v9;
|
asic_ops->update_qpd = update_qpd_v9;
|
||||||
asic_ops->init_sdma_vm = init_sdma_vm_v9;
|
asic_ops->init_sdma_vm = init_sdma_vm_v9;
|
||||||
|
asic_ops->mqd_manager_init = mqd_manager_init_v9;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
|
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
|
||||||
|
|
|
@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
|
||||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
||||||
asic_ops->update_qpd = update_qpd_vi;
|
asic_ops->update_qpd = update_qpd_vi;
|
||||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||||
|
asic_ops->mqd_manager_init = mqd_manager_init_vi;
|
||||||
}
|
}
|
||||||
|
|
||||||
void device_queue_manager_init_vi_tonga(
|
void device_queue_manager_init_vi_tonga(
|
||||||
|
@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
|
||||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
||||||
asic_ops->update_qpd = update_qpd_vi_tonga;
|
asic_ops->update_qpd = update_qpd_vi_tonga;
|
||||||
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
||||||
|
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||||
|
|
|
@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
|
||||||
return; /* Presumably process exited. */
|
return; /* Presumably process exited. */
|
||||||
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
||||||
memory_exception_data.gpu_id = dev->id;
|
memory_exception_data.gpu_id = dev->id;
|
||||||
memory_exception_data.failure.imprecise = 1;
|
memory_exception_data.failure.imprecise = true;
|
||||||
/* Set failure reason */
|
/* Set failure reason */
|
||||||
if (info) {
|
if (info) {
|
||||||
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
|
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
|
||||||
|
|
|
@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
|
case CHIP_VEGAM:
|
||||||
kfd_init_apertures_vi(pdd, id);
|
kfd_init_apertures_vi(pdd, id);
|
||||||
break;
|
break;
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
|
@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||||
kq->nop_packet = nop.u32all;
|
kq->nop_packet = nop.u32all;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case KFD_QUEUE_TYPE_DIQ:
|
case KFD_QUEUE_TYPE_DIQ:
|
||||||
|
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
|
||||||
|
break;
|
||||||
case KFD_QUEUE_TYPE_HIQ:
|
case KFD_QUEUE_TYPE_HIQ:
|
||||||
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
|
||||||
KFD_MQD_TYPE_HIQ);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
pr_err("Invalid queue type %d\n", type);
|
pr_err("Invalid queue type %d\n", type);
|
||||||
|
@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
|
case CHIP_VEGAM:
|
||||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||||
|
|
||||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||||
sizeof(struct pm4_mes_map_queues));
|
sizeof(struct pm4_mes_map_queues));
|
||||||
packet->bitfields2.alloc_format =
|
|
||||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
|
||||||
packet->bitfields2.num_queues = 1;
|
packet->bitfields2.num_queues = 1;
|
||||||
packet->bitfields2.queue_sel =
|
packet->bitfields2.queue_sel =
|
||||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||||
|
|
||||||
packet->bitfields2.engine_sel =
|
packet->bitfields2.engine_sel =
|
||||||
engine_sel__mes_map_queues__compute_vi;
|
engine_sel__mes_map_queues__compute_vi;
|
||||||
|
packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
|
||||||
packet->bitfields2.queue_type =
|
packet->bitfields2.queue_type =
|
||||||
queue_type__mes_map_queues__normal_compute_vi;
|
queue_type__mes_map_queues__normal_compute_vi;
|
||||||
|
|
||||||
|
@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||||
break;
|
break;
|
||||||
case KFD_QUEUE_TYPE_SDMA:
|
case KFD_QUEUE_TYPE_SDMA:
|
||||||
|
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||||
engine_sel__mes_map_queues__sdma0_vi;
|
engine_sel__mes_map_queues__sdma0_vi;
|
||||||
use_static = false; /* no static queues under SDMA */
|
use_static = false; /* no static queues under SDMA */
|
||||||
|
@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||||
engine_sel__mes_unmap_queues__compute;
|
engine_sel__mes_unmap_queues__compute;
|
||||||
break;
|
break;
|
||||||
case KFD_QUEUE_TYPE_SDMA:
|
case KFD_QUEUE_TYPE_SDMA:
|
||||||
|
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||||
packet->bitfields2.engine_sel =
|
packet->bitfields2.engine_sel =
|
||||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||||
|
|
||||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||||
sizeof(struct pm4_mes_map_queues));
|
sizeof(struct pm4_mes_map_queues));
|
||||||
packet->bitfields2.alloc_format =
|
|
||||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
|
||||||
packet->bitfields2.num_queues = 1;
|
packet->bitfields2.num_queues = 1;
|
||||||
packet->bitfields2.queue_sel =
|
packet->bitfields2.queue_sel =
|
||||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||||
|
@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||||
break;
|
break;
|
||||||
case KFD_QUEUE_TYPE_SDMA:
|
case KFD_QUEUE_TYPE_SDMA:
|
||||||
|
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||||
engine_sel__mes_map_queues__sdma0_vi;
|
engine_sel__mes_map_queues__sdma0_vi;
|
||||||
use_static = false; /* no static queues under SDMA */
|
use_static = false; /* no static queues under SDMA */
|
||||||
|
@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||||
engine_sel__mes_unmap_queues__compute;
|
engine_sel__mes_unmap_queues__compute;
|
||||||
break;
|
break;
|
||||||
case KFD_QUEUE_TYPE_SDMA:
|
case KFD_QUEUE_TYPE_SDMA:
|
||||||
|
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||||
packet->bitfields2.engine_sel =
|
packet->bitfields2.engine_sel =
|
||||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -23,34 +23,54 @@
|
||||||
|
|
||||||
#include "kfd_mqd_manager.h"
|
#include "kfd_mqd_manager.h"
|
||||||
#include "amdgpu_amdkfd.h"
|
#include "amdgpu_amdkfd.h"
|
||||||
|
#include "kfd_device_queue_manager.h"
|
||||||
|
|
||||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
|
||||||
struct kfd_dev *dev)
|
|
||||||
{
|
{
|
||||||
switch (dev->device_info->asic_family) {
|
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||||
case CHIP_KAVERI:
|
|
||||||
return mqd_manager_init_cik(type, dev);
|
|
||||||
case CHIP_HAWAII:
|
|
||||||
return mqd_manager_init_cik_hawaii(type, dev);
|
|
||||||
case CHIP_CARRIZO:
|
|
||||||
return mqd_manager_init_vi(type, dev);
|
|
||||||
case CHIP_TONGA:
|
|
||||||
case CHIP_FIJI:
|
|
||||||
case CHIP_POLARIS10:
|
|
||||||
case CHIP_POLARIS11:
|
|
||||||
case CHIP_POLARIS12:
|
|
||||||
return mqd_manager_init_vi_tonga(type, dev);
|
|
||||||
case CHIP_VEGA10:
|
|
||||||
case CHIP_VEGA12:
|
|
||||||
case CHIP_VEGA20:
|
|
||||||
case CHIP_RAVEN:
|
|
||||||
return mqd_manager_init_v9(type, dev);
|
|
||||||
default:
|
|
||||||
WARN(1, "Unexpected ASIC family %u",
|
|
||||||
dev->device_info->asic_family);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||||
|
if (!mqd_mem_obj)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
|
||||||
|
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
|
||||||
|
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
|
||||||
|
|
||||||
|
return mqd_mem_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||||
|
uint64_t offset;
|
||||||
|
|
||||||
|
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||||
|
if (!mqd_mem_obj)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
offset = (q->sdma_engine_id *
|
||||||
|
dev->device_info->num_sdma_queues_per_engine +
|
||||||
|
q->sdma_queue_id) *
|
||||||
|
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
|
||||||
|
|
||||||
|
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||||
|
|
||||||
|
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
|
||||||
|
+ offset);
|
||||||
|
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
|
||||||
|
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
|
||||||
|
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
|
||||||
|
|
||||||
|
return mqd_mem_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj)
|
||||||
|
{
|
||||||
|
WARN_ON(!mqd_mem_obj->gtt_mem);
|
||||||
|
kfree(mqd_mem_obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||||
|
|
|
@ -99,8 +99,16 @@ struct mqd_manager {
|
||||||
|
|
||||||
struct mutex mqd_mutex;
|
struct mutex mqd_mutex;
|
||||||
struct kfd_dev *dev;
|
struct kfd_dev *dev;
|
||||||
|
uint32_t mqd_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
|
||||||
|
|
||||||
|
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||||
|
struct queue_properties *q);
|
||||||
|
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj);
|
||||||
|
|
||||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||||
uint32_t *se_mask);
|
uint32_t *se_mask);
|
||||||
|
|
|
@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||||
m->compute_static_thread_mgmt_se3);
|
m->compute_static_thread_mgmt_se3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj;
|
||||||
|
|
||||||
|
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||||
|
return allocate_hiq_mqd(kfd);
|
||||||
|
|
||||||
|
if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
|
||||||
|
&mqd_mem_obj))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return mqd_mem_obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
struct queue_properties *q)
|
struct queue_properties *q)
|
||||||
|
@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
uint64_t addr;
|
uint64_t addr;
|
||||||
struct cik_mqd *m;
|
struct cik_mqd *m;
|
||||||
int retval;
|
int retval;
|
||||||
|
struct kfd_dev *kfd = mm->dev;
|
||||||
|
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
|
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||||
mqd_mem_obj);
|
if (!*mqd_mem_obj)
|
||||||
|
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
{
|
{
|
||||||
int retval;
|
int retval;
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct cik_sdma_rlc_registers *m;
|
||||||
|
struct kfd_dev *dev = mm->dev;
|
||||||
|
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||||
sizeof(struct cik_sdma_rlc_registers),
|
if (!*mqd_mem_obj)
|
||||||
mqd_mem_obj);
|
|
||||||
|
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|
||||||
struct kfd_mem_obj *mqd_mem_obj)
|
|
||||||
{
|
|
||||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||||
uint32_t queue_id, struct queue_properties *p,
|
uint32_t queue_id, struct queue_properties *p,
|
||||||
|
@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd;
|
mqd->update_mqd = update_mqd;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_HIQ:
|
case KFD_MQD_TYPE_HIQ:
|
||||||
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
|
mqd->load_mqd = load_mqd;
|
||||||
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case KFD_MQD_TYPE_DIQ:
|
||||||
mqd->init_mqd = init_mqd_hiq;
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
mqd->uninit_mqd = uninit_mqd;
|
mqd->uninit_mqd = uninit_mqd;
|
||||||
mqd->load_mqd = load_mqd;
|
mqd->load_mqd = load_mqd;
|
||||||
mqd->update_mqd = update_mqd_hiq;
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_SDMA:
|
case KFD_MQD_TYPE_SDMA:
|
||||||
mqd->init_mqd = init_mqd_sdma;
|
mqd->init_mqd = init_mqd_sdma;
|
||||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
mqd->load_mqd = load_mqd_sdma;
|
mqd->load_mqd = load_mqd_sdma;
|
||||||
mqd->update_mqd = update_mqd_sdma;
|
mqd->update_mqd = update_mqd_sdma;
|
||||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||||
mqd->is_occupied = is_occupied_sdma;
|
mqd->is_occupied = is_occupied_sdma;
|
||||||
|
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -67,6 +67,43 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||||
m->compute_static_thread_mgmt_se3);
|
m->compute_static_thread_mgmt_se3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
int retval;
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||||
|
|
||||||
|
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||||
|
return allocate_hiq_mqd(kfd);
|
||||||
|
|
||||||
|
/* From V9, for CWSR, the control stack is located on the next page
|
||||||
|
* boundary after the mqd, we will use the gtt allocation function
|
||||||
|
* instead of sub-allocation function.
|
||||||
|
*/
|
||||||
|
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||||
|
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||||
|
if (!mqd_mem_obj)
|
||||||
|
return NULL;
|
||||||
|
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||||
|
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||||
|
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||||
|
&(mqd_mem_obj->gtt_mem),
|
||||||
|
&(mqd_mem_obj->gpu_addr),
|
||||||
|
(void *)&(mqd_mem_obj->cpu_ptr), true);
|
||||||
|
} else {
|
||||||
|
retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
|
||||||
|
&mqd_mem_obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (retval) {
|
||||||
|
kfree(mqd_mem_obj);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mqd_mem_obj;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
struct queue_properties *q)
|
struct queue_properties *q)
|
||||||
|
@ -76,24 +113,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct v9_mqd *m;
|
struct v9_mqd *m;
|
||||||
struct kfd_dev *kfd = mm->dev;
|
struct kfd_dev *kfd = mm->dev;
|
||||||
|
|
||||||
/* From V9, for CWSR, the control stack is located on the next page
|
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||||
* boundary after the mqd, we will use the gtt allocation function
|
if (!*mqd_mem_obj)
|
||||||
* instead of sub-allocation function.
|
|
||||||
*/
|
|
||||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
|
||||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
|
||||||
if (!*mqd_mem_obj)
|
|
||||||
return -ENOMEM;
|
|
||||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
|
||||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
|
||||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
|
||||||
&((*mqd_mem_obj)->gtt_mem),
|
|
||||||
&((*mqd_mem_obj)->gpu_addr),
|
|
||||||
(void *)&((*mqd_mem_obj)->cpu_ptr), true);
|
|
||||||
} else
|
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
|
|
||||||
mqd_mem_obj);
|
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
{
|
{
|
||||||
int retval;
|
int retval;
|
||||||
struct v9_sdma_mqd *m;
|
struct v9_sdma_mqd *m;
|
||||||
|
struct kfd_dev *dev = mm->dev;
|
||||||
|
|
||||||
|
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
if (!*mqd_mem_obj)
|
||||||
sizeof(struct v9_sdma_mqd),
|
|
||||||
mqd_mem_obj);
|
|
||||||
|
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|
||||||
struct kfd_mem_obj *mqd_mem_obj)
|
|
||||||
{
|
|
||||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
uint32_t pipe_id, uint32_t queue_id,
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
struct queue_properties *p, struct mm_struct *mms)
|
struct queue_properties *p, struct mm_struct *mms)
|
||||||
|
@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
mqd->get_wave_state = get_wave_state;
|
mqd->get_wave_state = get_wave_state;
|
||||||
|
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_HIQ:
|
case KFD_MQD_TYPE_HIQ:
|
||||||
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
|
mqd->load_mqd = load_mqd;
|
||||||
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case KFD_MQD_TYPE_DIQ:
|
||||||
mqd->init_mqd = init_mqd_hiq;
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
mqd->uninit_mqd = uninit_mqd;
|
mqd->uninit_mqd = uninit_mqd;
|
||||||
mqd->load_mqd = load_mqd;
|
mqd->load_mqd = load_mqd;
|
||||||
mqd->update_mqd = update_mqd_hiq;
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_SDMA:
|
case KFD_MQD_TYPE_SDMA:
|
||||||
mqd->init_mqd = init_mqd_sdma;
|
mqd->init_mqd = init_mqd_sdma;
|
||||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
mqd->load_mqd = load_mqd_sdma;
|
mqd->load_mqd = load_mqd_sdma;
|
||||||
mqd->update_mqd = update_mqd_sdma;
|
mqd->update_mqd = update_mqd_sdma;
|
||||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||||
mqd->is_occupied = is_occupied_sdma;
|
mqd->is_occupied = is_occupied_sdma;
|
||||||
|
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||||
m->compute_static_thread_mgmt_se3);
|
m->compute_static_thread_mgmt_se3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj;
|
||||||
|
|
||||||
|
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||||
|
return allocate_hiq_mqd(kfd);
|
||||||
|
|
||||||
|
if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
|
||||||
|
&mqd_mem_obj))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return mqd_mem_obj;
|
||||||
|
}
|
||||||
|
|
||||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
struct queue_properties *q)
|
struct queue_properties *q)
|
||||||
|
@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
int retval;
|
int retval;
|
||||||
uint64_t addr;
|
uint64_t addr;
|
||||||
struct vi_mqd *m;
|
struct vi_mqd *m;
|
||||||
|
struct kfd_dev *kfd = mm->dev;
|
||||||
|
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
|
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||||
mqd_mem_obj);
|
if (!*mqd_mem_obj)
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
{
|
{
|
||||||
int retval;
|
int retval;
|
||||||
struct vi_sdma_mqd *m;
|
struct vi_sdma_mqd *m;
|
||||||
|
struct kfd_dev *dev = mm->dev;
|
||||||
|
|
||||||
|
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
if (!*mqd_mem_obj)
|
||||||
sizeof(struct vi_sdma_mqd),
|
|
||||||
mqd_mem_obj);
|
|
||||||
|
|
||||||
if (retval != 0)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
memset(m, 0, sizeof(struct vi_sdma_mqd));
|
memset(m, 0, sizeof(struct vi_sdma_mqd));
|
||||||
|
|
||||||
*mqd = m;
|
*mqd = m;
|
||||||
if (gart_addr != NULL)
|
if (gart_addr)
|
||||||
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||||
|
|
||||||
retval = mm->update_mqd(mm, m, q);
|
retval = mm->update_mqd(mm, m, q);
|
||||||
|
@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
|
||||||
struct kfd_mem_obj *mqd_mem_obj)
|
|
||||||
{
|
|
||||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
uint32_t pipe_id, uint32_t queue_id,
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
struct queue_properties *p, struct mm_struct *mms)
|
struct queue_properties *p, struct mm_struct *mms)
|
||||||
|
@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
mqd->get_wave_state = get_wave_state;
|
mqd->get_wave_state = get_wave_state;
|
||||||
|
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_HIQ:
|
case KFD_MQD_TYPE_HIQ:
|
||||||
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
|
mqd->load_mqd = load_mqd;
|
||||||
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case KFD_MQD_TYPE_DIQ:
|
||||||
mqd->init_mqd = init_mqd_hiq;
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
mqd->uninit_mqd = uninit_mqd;
|
mqd->uninit_mqd = uninit_mqd;
|
||||||
mqd->load_mqd = load_mqd;
|
mqd->load_mqd = load_mqd;
|
||||||
mqd->update_mqd = update_mqd_hiq;
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_SDMA:
|
case KFD_MQD_TYPE_SDMA:
|
||||||
mqd->init_mqd = init_mqd_sdma;
|
mqd->init_mqd = init_mqd_sdma;
|
||||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||||
mqd->load_mqd = load_mqd_sdma;
|
mqd->load_mqd = load_mqd_sdma;
|
||||||
mqd->update_mqd = update_mqd_sdma;
|
mqd->update_mqd = update_mqd_sdma;
|
||||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||||
mqd->is_occupied = is_occupied_sdma;
|
mqd->is_occupied = is_occupied_sdma;
|
||||||
|
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
|
||||||
#if defined(CONFIG_DEBUG_FS)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||||
|
|
||||||
process_count = pm->dqm->processes_count;
|
process_count = pm->dqm->processes_count;
|
||||||
queue_count = pm->dqm->queue_count;
|
queue_count = pm->dqm->queue_count;
|
||||||
compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
|
compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
|
||||||
|
pm->dqm->xgmi_sdma_queue_count;
|
||||||
|
|
||||||
/* check if there is over subscription
|
/* check if there is over subscription
|
||||||
* Note: the arbitration between the number of VMIDs and
|
* Note: the arbitration between the number of VMIDs and
|
||||||
|
@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
|
case CHIP_VEGAM:
|
||||||
pm->pmf = &kfd_vi_pm_funcs;
|
pm->pmf = &kfd_vi_pm_funcs;
|
||||||
break;
|
break;
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
|
|
|
@ -176,8 +176,7 @@ struct pm4_mes_map_process {
|
||||||
|
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
uint32_t num_gws:6;
|
uint32_t num_gws:7;
|
||||||
uint32_t reserved7:1;
|
|
||||||
uint32_t sdma_enable:1;
|
uint32_t sdma_enable:1;
|
||||||
uint32_t num_oac:4;
|
uint32_t num_oac:4;
|
||||||
uint32_t reserved8:4;
|
uint32_t reserved8:4;
|
||||||
|
@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
|
||||||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
enum mes_map_queues_alloc_format_enum {
|
|
||||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
|
||||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
|
||||||
};
|
|
||||||
|
|
||||||
enum mes_map_queues_engine_sel_enum {
|
enum mes_map_queues_engine_sel_enum {
|
||||||
engine_sel__mes_map_queues__compute_vi = 0,
|
engine_sel__mes_map_queues__compute_vi = 0,
|
||||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||||
|
@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
|
||||||
struct {
|
struct {
|
||||||
uint32_t reserved1:4;
|
uint32_t reserved1:4;
|
||||||
enum mes_map_queues_queue_sel_enum queue_sel:2;
|
enum mes_map_queues_queue_sel_enum queue_sel:2;
|
||||||
uint32_t reserved2:15;
|
uint32_t reserved5:6;
|
||||||
|
uint32_t gws_control_queue:1;
|
||||||
|
uint32_t reserved2:8;
|
||||||
enum mes_map_queues_queue_type_enum queue_type:3;
|
enum mes_map_queues_queue_type_enum queue_type:3;
|
||||||
enum mes_map_queues_alloc_format_enum alloc_format:2;
|
uint32_t reserved3:2;
|
||||||
enum mes_map_queues_engine_sel_enum engine_sel:3;
|
enum mes_map_queues_engine_sel_enum engine_sel:3;
|
||||||
uint32_t num_queues:3;
|
uint32_t num_queues:3;
|
||||||
} bitfields2;
|
} bitfields2;
|
||||||
|
|
|
@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
|
||||||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||||
};
|
};
|
||||||
|
|
||||||
enum mes_map_queues_alloc_format_vi_enum {
|
|
||||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
|
||||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
|
||||||
};
|
|
||||||
|
|
||||||
enum mes_map_queues_engine_sel_vi_enum {
|
enum mes_map_queues_engine_sel_vi_enum {
|
||||||
engine_sel__mes_map_queues__compute_vi = 0,
|
engine_sel__mes_map_queues__compute_vi = 0,
|
||||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||||
|
@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
|
||||||
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
|
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
|
||||||
uint32_t reserved2:15;
|
uint32_t reserved2:15;
|
||||||
enum mes_map_queues_queue_type_vi_enum queue_type:3;
|
enum mes_map_queues_queue_type_vi_enum queue_type:3;
|
||||||
enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
|
uint32_t reserved3:2;
|
||||||
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
|
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
|
||||||
uint32_t num_queues:3;
|
uint32_t num_queues:3;
|
||||||
} bitfields2;
|
} bitfields2;
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||||
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
||||||
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
||||||
|
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
|
||||||
|
|
||||||
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
||||||
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
||||||
|
@ -160,6 +161,11 @@ extern int noretry;
|
||||||
*/
|
*/
|
||||||
extern int halt_if_hws_hang;
|
extern int halt_if_hws_hang;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whether MEC FW support GWS barriers
|
||||||
|
*/
|
||||||
|
extern bool hws_gws_support;
|
||||||
|
|
||||||
enum cache_policy {
|
enum cache_policy {
|
||||||
cache_policy_coherent,
|
cache_policy_coherent,
|
||||||
cache_policy_noncoherent
|
cache_policy_noncoherent
|
||||||
|
@ -188,6 +194,7 @@ struct kfd_device_info {
|
||||||
bool needs_iommu_device;
|
bool needs_iommu_device;
|
||||||
bool needs_pci_atomics;
|
bool needs_pci_atomics;
|
||||||
unsigned int num_sdma_engines;
|
unsigned int num_sdma_engines;
|
||||||
|
unsigned int num_xgmi_sdma_engines;
|
||||||
unsigned int num_sdma_queues_per_engine;
|
unsigned int num_sdma_queues_per_engine;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -258,7 +265,7 @@ struct kfd_dev {
|
||||||
bool interrupts_active;
|
bool interrupts_active;
|
||||||
|
|
||||||
/* Debug manager */
|
/* Debug manager */
|
||||||
struct kfd_dbgmgr *dbgmgr;
|
struct kfd_dbgmgr *dbgmgr;
|
||||||
|
|
||||||
/* Firmware versions */
|
/* Firmware versions */
|
||||||
uint16_t mec_fw_version;
|
uint16_t mec_fw_version;
|
||||||
|
@ -282,6 +289,9 @@ struct kfd_dev {
|
||||||
|
|
||||||
/* Compute Profile ref. count */
|
/* Compute Profile ref. count */
|
||||||
atomic_t compute_profile;
|
atomic_t compute_profile;
|
||||||
|
|
||||||
|
/* Global GWS resource shared b/t processes*/
|
||||||
|
void *gws;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum kfd_mempool {
|
enum kfd_mempool {
|
||||||
|
@ -329,7 +339,8 @@ enum kfd_queue_type {
|
||||||
KFD_QUEUE_TYPE_COMPUTE,
|
KFD_QUEUE_TYPE_COMPUTE,
|
||||||
KFD_QUEUE_TYPE_SDMA,
|
KFD_QUEUE_TYPE_SDMA,
|
||||||
KFD_QUEUE_TYPE_HIQ,
|
KFD_QUEUE_TYPE_HIQ,
|
||||||
KFD_QUEUE_TYPE_DIQ
|
KFD_QUEUE_TYPE_DIQ,
|
||||||
|
KFD_QUEUE_TYPE_SDMA_XGMI
|
||||||
};
|
};
|
||||||
|
|
||||||
enum kfd_queue_format {
|
enum kfd_queue_format {
|
||||||
|
@ -444,6 +455,9 @@ struct queue_properties {
|
||||||
*
|
*
|
||||||
* @device: The kfd device that created this queue.
|
* @device: The kfd device that created this queue.
|
||||||
*
|
*
|
||||||
|
* @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
|
||||||
|
* otherwise.
|
||||||
|
*
|
||||||
* This structure represents user mode compute queues.
|
* This structure represents user mode compute queues.
|
||||||
* It contains all the necessary data to handle such queues.
|
* It contains all the necessary data to handle such queues.
|
||||||
*
|
*
|
||||||
|
@ -465,6 +479,7 @@ struct queue {
|
||||||
|
|
||||||
struct kfd_process *process;
|
struct kfd_process *process;
|
||||||
struct kfd_dev *device;
|
struct kfd_dev *device;
|
||||||
|
void *gws;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
|
||||||
KFD_MQD_TYPE_HIQ, /* for hiq */
|
KFD_MQD_TYPE_HIQ, /* for hiq */
|
||||||
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
||||||
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
||||||
|
KFD_MQD_TYPE_DIQ, /* for diq */
|
||||||
KFD_MQD_TYPE_MAX
|
KFD_MQD_TYPE_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
|
||||||
void print_queue_properties(struct queue_properties *q);
|
void print_queue_properties(struct queue_properties *q);
|
||||||
void print_queue(struct queue *q);
|
void print_queue(struct queue *q);
|
||||||
|
|
||||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
|
||||||
struct kfd_dev *dev);
|
|
||||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
struct kfd_dev *dev);
|
struct kfd_dev *dev);
|
||||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||||
|
@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||||
struct queue_properties *p);
|
struct queue_properties *p);
|
||||||
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
||||||
struct queue_properties *p);
|
struct queue_properties *p);
|
||||||
|
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
|
||||||
|
void *gws);
|
||||||
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
||||||
unsigned int qid);
|
unsigned int qid);
|
||||||
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue