1
0
Fork 0

Merge branch 'drm-next-4.21' of git://people.freedesktop.org/~agd5f/linux into drm-next

New features for 4.21:
amdgpu:
- Support for SDMA paging queue on vega
- Put compute EOP buffers into vram for better performance
- Share more code with amdkfd
- Support for scanout with DCC on gfx9
- Initial kerneldoc for DC
- Updated SMU firmware support for gfx8 chips
- Rework CSA handling for eventual support for preemption
- XGMI PSP support
- Clean up RLC handling
- Enable GPU reset by default on VI, SOC15 dGPUs
- Ring and IB test cleanups

amdkfd:
- Share more code with amdgpu

ttm:
- Move global init out of the drivers

scheduler:
- Track if schedulers are ready for work
- Timeout/fault handling changes to facilitate GPU recovery

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181114165113.3751-1-alexander.deucher@amd.com
hifive-unleashed-5.1
Dave Airlie 2018-11-19 11:07:52 +10:00
commit 9235dd441a
207 changed files with 5633 additions and 5465 deletions

View File

@ -0,0 +1,68 @@
===================================
drm/amd/display - Display Core (DC)
===================================
*placeholder - general description of supported platforms, what dc is, etc.*
Because it is partially shared with other operating systems, the Display Core
Driver is divided in two pieces.
1. **Display Core (DC)** contains the OS-agnostic components. Things like
hardware programming and resource management are handled here.
2. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
amdgpu base driver and DRM are implemented here.
It doesn't help that the entire package is frequently referred to as DC. But
with the context in mind, it should be clear.
When CONFIG_DRM_AMD_DC is enabled, DC will be initialized by default for
supported ASICs. To force disable, set `amdgpu.dc=0` on kernel command line.
Likewise, to force enable on unsupported ASICs, set `amdgpu.dc=1`.
To determine if DC is loaded, search dmesg for the following entry:
``Display Core initialized with <version number here>``
AMDgpu Display Manager
======================
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
:internal:
Lifecycle
---------
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:doc: DM Lifecycle
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:functions: dm_hw_init dm_hw_fini
Interrupts
----------
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
:doc: overview
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
:internal:
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:functions: register_hpd_handlers dm_crtc_high_irq dm_pflip_high_irq
Atomic Implementation
---------------------
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:doc: atomic
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
:functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail
Display Core
============
**WIP**

View File

@ -5,6 +5,7 @@ GPU Driver Documentation
.. toctree::
amdgpu
amdgpu-dc
i915
meson
pl111

View File

@ -72,8 +72,8 @@ object TTM to provide a pool for buffer object allocation by clients and
the kernel itself. The type of this object should be
TTM_GLOBAL_TTM_BO, and its size should be sizeof(struct
ttm_bo_global). Again, driver-specific init and release functions may
be provided, likely eventually calling ttm_bo_global_init() and
ttm_bo_global_release(), respectively. Also, like the previous
be provided, likely eventually calling ttm_bo_global_ref_init() and
ttm_bo_global_ref_release(), respectively. Also, like the previous
object, ttm_global_item_ref() is used to create an initial reference
count for the TTM, which will call your initialization function.

View File

@ -11,7 +11,7 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \
drm_sysfs.o drm_hashtab.o drm_mm.o \
drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
drm_info.o drm_encoder_slave.o \
drm_trace_points.o drm_global.o drm_prime.o \
drm_trace_points.o drm_prime.o \
drm_rect.o drm_vma_manager.o drm_flip_work.o \
drm_modeset_lock.o drm_atomic.o drm_bridge.o \
drm_framebuffer.o drm_connector.o drm_blend.o \

View File

@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_gmc.o amdgpu_xgmi.o
amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@ -105,6 +105,7 @@ amdgpu-y += \
# add GFX block
amdgpu-y += \
amdgpu_gfx.o \
amdgpu_rlc.o \
gfx_v8_0.o \
gfx_v9_0.o

View File

@ -75,6 +75,7 @@
#include "amdgpu_sdma.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
#include "amdgpu_job.h"
@ -433,7 +434,7 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/
/* sDMA engines reserved from 0xe0 -oxef */
/* sDMA engines reserved from 0xe0 -0xef */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
@ -831,7 +832,6 @@ struct amdgpu_device {
bool need_dma32;
bool need_swiotlb;
bool accel_working;
struct work_struct reset_work;
struct notifier_block acpi_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];

View File

@ -144,7 +144,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
KGD_MAX_QUEUES);
/* remove the KIQ bit as well */
if (adev->gfx.kiq.ring.ready)
if (adev->gfx.kiq.ring.sched.ready)
clear_bit(amdgpu_gfx_queue_to_bit(adev,
adev->gfx.kiq.ring.me - 1,
adev->gfx.kiq.ring.pipe,
@ -268,9 +268,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
amdgpu_device_gpu_recover(adev, NULL);
}
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9)
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
@ -340,7 +340,7 @@ allocate_mem_reserve_bo_failed:
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
@ -351,8 +351,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
amdgpu_bo_unref(&(bo));
}
void get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info)
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
@ -383,7 +383,7 @@ void get_local_mem_info(struct kgd_dev *kgd,
mem_info->mem_clk_max = 100;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@ -392,7 +392,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
return 0;
}
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@ -405,7 +405,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
return 100;
}
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;

View File

@ -134,16 +134,16 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
void get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info);
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info);
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);

View File

@ -173,13 +173,6 @@ static int get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@ -200,28 +193,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)

View File

@ -128,13 +128,6 @@ static int get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@ -157,27 +150,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)

View File

@ -46,38 +46,9 @@
#include "v9_structs.h"
#include "soc15.h"
#include "soc15d.h"
#include "mmhub_v1_0.h"
#include "gfxhub_v1_0.h"
/* HACK: MMHUB and GC both have VM-related register with the same
* names but different offsets. Define the MMHUB register we need here
* with a prefix. A proper solution would be to move the functions
* programming these registers into gfx_v9_0.c and mmhub_v1_0.c
* respectively.
*/
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
#define V9_PIPE_PER_MEC (4)
#define V9_QUEUES_PER_PIPE_MEC (8)
@ -167,13 +138,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
}
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_local_mem_info = get_local_mem_info,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
.alloc_pasid = amdgpu_pasid_alloc,
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
.init_interrupts = kgd_init_interrupts,
@ -196,26 +160,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = amdgpu_amdkfd_get_tile_config,
.get_cu_info = get_cu_info,
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.gpu_recover = amdgpu_amdkfd_gpu_reset,
.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
@ -785,15 +732,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
uint32_t req = (1 << vmid) |
(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
mutex_lock(&adev->srbm_mutex);
/* Use legacy mode tlb invalidation.
*
@ -810,34 +748,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
* TODO 2: support range-based invalidation, requires kfg2kgd
* interface change
*/
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
0xffffffff);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
0x0000001f);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
0xffffffff);
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
0x0000001f);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
req);
while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
(1 << vmid)))
cpu_relax();
while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
(1 << vmid)))
cpu_relax();
mutex_unlock(&adev->srbm_mutex);
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
}
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@ -876,7 +787,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
if (adev->in_gpu_reset)
return -EIO;
if (ring->ready)
if (ring->sched.ready)
return invalidate_tlbs_with_kiq(adev, pasid);
for (vmid = 0; vmid < 16; vmid++) {
@ -1016,7 +927,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
@ -1028,25 +938,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
* now, all processes share the same address space size, like
* on GFX8 and older.
*/
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
upper_32_bits(adev->vm_manager.max_pfn - 1));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}

View File

@ -330,7 +330,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_TOPAZ:
if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) {
((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
} else
@ -351,7 +353,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
if (type == CGS_UCODE_ID_SMU) {
if (((adev->pdev->device == 0x67ef) &&
((adev->pdev->revision == 0xe0) ||
(adev->pdev->revision == 0xe2) ||
(adev->pdev->revision == 0xe5))) ||
((adev->pdev->device == 0x67ff) &&
((adev->pdev->revision == 0xcf) ||
@ -359,8 +360,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
(adev->pdev->revision == 0xff)))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
} else
} else if ((adev->pdev->device == 0x67ef) &&
(adev->pdev->revision == 0xe2)) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
} else {
strcpy(fw_name, "amdgpu/polaris11_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
}
@ -378,14 +384,31 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
(adev->pdev->revision == 0xef))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
} else
} else if ((adev->pdev->device == 0x67df) &&
((adev->pdev->revision == 0xe1) ||
(adev->pdev->revision == 0xf7))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
} else {
strcpy(fw_name, "amdgpu/polaris10_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
}
break;
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
if (((adev->pdev->device == 0x6987) &&
((adev->pdev->revision == 0xc0) ||
(adev->pdev->revision == 0xc3))) ||
((adev->pdev->device == 0x6981) &&
((adev->pdev->revision == 0x00) ||
(adev->pdev->revision == 0x01) ||
(adev->pdev->revision == 0x10)))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
} else {
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
}
break;
case CHIP_VEGAM:
strcpy(fw_name, "amdgpu/vegam_smc.bin");

View File

@ -1260,8 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
return 0;
error_abort:
dma_fence_put(&job->base.s_fence->finished);
job->base.s_fence = NULL;
drm_sched_job_cleanup(&job->base);
amdgpu_mn_unlock(p->mn);
error_unlock:
@ -1285,7 +1284,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
DRM_ERROR("Failed to initialize parser %d!\n", r);
goto out;
}

View File

@ -0,0 +1,117 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
* * Author: Monk.liu@amd.com
*/
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
u32 domain, uint32_t size)
{
int r;
void *ptr;
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
domain, bo,
NULL, &ptr);
if (!bo)
return -ENOMEM;
memset(ptr, 0, size);
return 0;
}
void amdgpu_free_static_csa(struct amdgpu_bo **bo)
{
amdgpu_bo_free_kernel(bo, NULL, NULL);
}
/*
* amdgpu_map_static_csa should be called during amdgpu_vm_init
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
* submission of GFX should use this virtual address within META_DATA init
* package to support SRIOV gfx preemption.
*/
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size)
{
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
csa_tv.bo = &bo->tbo;
csa_tv.shared = true;
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
}
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM;
}
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
size);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}

View File

@ -0,0 +1,39 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Author: Monk.liu@amd.com
*/
#ifndef AMDGPU_CSA_MANAGER_H
#define AMDGPU_CSA_MANAGER_H
#define AMDGPU_CSA_SIZE (128 * 1024)
uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
u32 domain, uint32_t size);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
uint64_t csa_addr, uint32_t size);
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
#endif

View File

@ -1656,7 +1656,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
/* right after GMC hw init, we create CSA */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_allocate_static_csa(adev);
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("allocate CSA failed %d\n", r);
return r;
@ -1681,7 +1683,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
return r;
amdgpu_xgmi_add_device(adev);
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
if (amdgpu_sriov_vf(adev))
@ -1890,7 +1893,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(adev);
amdgpu_free_static_csa(&adev->virt.csa_obj);
amdgpu_device_wb_fini(adev);
amdgpu_device_vram_scratch_fini(adev);
}
@ -3295,13 +3298,35 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
return false;
}
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
!amdgpu_sriov_vf(adev))) {
DRM_INFO("GPU recovery disabled.\n");
return false;
if (amdgpu_gpu_recovery == 0)
goto disabled;
if (amdgpu_sriov_vf(adev))
return true;
if (amdgpu_gpu_recovery == -1) {
switch (adev->asic_type) {
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_VEGA20:
case CHIP_VEGA10:
case CHIP_VEGA12:
break;
default:
goto disabled;
}
}
return true;
disabled:
DRM_INFO("GPU recovery disabled.\n");
return false;
}
/**

View File

@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
"cpu addr 0x%p\n", ring->idx,
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
"0x%016llx, cpu addr 0x%p\n", ring->name,
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
return 0;
}

View File

@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
mb();
amdgpu_asic_flush_hdp(adev, NULL);
amdgpu_gmc_flush_gpu_tlb(adev, 0);
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
}
@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
* @offset: offset into the GPU's gart aperture
* @pages: number of pages to bind
* @dma_addr: DMA addresses of pages
* @flags: page table entry flags
* @dst: CPU address of the gart table
*
* Map the dma_addresses into GART entries (all asics).
* Returns 0 for success, -EINVAL for failure.
@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
mb();
amdgpu_asic_flush_hdp(adev, NULL);
amdgpu_gmc_flush_gpu_tlb(adev, 0);
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
return 0;
}

View File

@ -41,6 +41,7 @@ struct amdgpu_bo;
struct amdgpu_gart {
struct amdgpu_bo *bo;
/* CPU kmapped address of gart table */
void *ptr;
unsigned num_gpu_pages;
unsigned num_cpu_pages;

View File

@ -25,6 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)

View File

@ -29,6 +29,7 @@
*/
#include "clearstate_defs.h"
#include "amdgpu_ring.h"
#include "amdgpu_rlc.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@ -37,59 +38,6 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
struct amdgpu_rlc_funcs {
void (*enter_safe_mode)(struct amdgpu_device *adev);
void (*exit_safe_mode)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {

View File

@ -64,7 +64,7 @@ struct amdgpu_vmhub {
struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev,
uint32_t vmid);
uint32_t vmid, uint32_t flush_type);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs {
struct amdgpu_xgmi {
/* from psp */
u64 device_id;
u64 node_id;
u64 hive_id;
/* fixed per family */
u64 node_segment_size;
@ -151,7 +151,7 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))

View File

@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
fence_ctx = 0;
}
if (!ring->ready) {
if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
need_ctx_switch);
amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
need_ctx_switch = false;
}
@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
}
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
for (i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
long tmo;
if (!ring || !ring->ready)
continue;
/* skip IB tests for KIQ in general for the below reasons:
* 1. We never submit IBs to the KIQ
* 2. KIQ doesn't use the EOP interrupts,
* we use some other CP interrupt.
/* KIQ rings don't have an IB test because we never submit IBs
* to them and they have no interrupt support.
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
if (!ring->sched.ready || !ring->funcs->test_ib)
continue;
/* MM engine need more time */
@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
tmo = tmo_gfx;
r = amdgpu_ring_test_ib(ring, tmo);
if (r) {
ring->ready = false;
if (!r) {
DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
ring->name);
continue;
}
if (ring == &adev->gfx.gfx_ring[0]) {
/* oh, oh, that's really bad */
DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r);
adev->accel_working = false;
return r;
ring->sched.ready = false;
DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
ring->name, r);
} else {
/* still not good, but we can live with it */
DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
ret = r;
}
if (ring == &adev->gfx.gfx_ring[0]) {
/* oh, oh, that's really bad */
adev->accel_working = false;
return r;
} else {
ret = r;
}
}
return ret;

View File

@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
drm_helper_hpd_irq_event(dev);
}
/**
* amdgpu_irq_reset_work_func - execute GPU reset
*
* @work: work struct pointer
*
* Execute scheduled GPU reset (Cayman+).
* This function is called when the IRQ handler thinks we need a GPU reset.
*/
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
}
/**
* amdgpu_irq_disable_all - disable *all* interrupts
*
@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
amdgpu_hotplug_work_func);
}
INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
adev->irq.installed = true;
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
if (r) {
adev->irq.installed = false;
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
return r;
}
adev->ddev->max_vblank_count = 0x00ffffff;
@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
pci_disable_msi(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
}
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {

View File

@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
drm_sched_job_cleanup(s_job);
amdgpu_ring_priority_put(ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);

View File

@ -33,6 +33,8 @@
#define to_amdgpu_job(sched_job) \
container_of((sched_job), struct amdgpu_job, base)
#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
struct amdgpu_fence;
struct amdgpu_job {

View File

@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
if (adev->gfx.gfx_ring[i].ready)
if (adev->gfx.gfx_ring[i].sched.ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
if (adev->gfx.compute_ring[i].ready)
if (adev->gfx.compute_ring[i].sched.ready)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
if (adev->sdma.instance[i].ring.ready)
if (adev->sdma.instance[i].ring.sched.ready)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
if (adev->uvd.inst[i].ring.ready)
if (adev->uvd.inst[i].ring.sched.ready)
++num_rings;
}
ib_start_alignment = 64;
@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
if (adev->vce.ring[i].ready)
if (adev->vce.ring[i].sched.ready)
++num_rings;
ib_start_alignment = 4;
ib_size_alignment = 1;
@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
if (adev->uvd.inst[i].ring_enc[j].ready)
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
++num_rings;
}
ib_start_alignment = 64;
@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_dec.ready)
if (adev->vcn.ring_dec.sched.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
for (i = 0; i < adev->vcn.num_enc_rings; i++)
if (adev->vcn.ring_enc[i].ready)
if (adev->vcn.ring_enc[i].sched.ready)
++num_rings;
ib_start_alignment = 64;
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
if (adev->vcn.ring_jpeg.ready)
if (adev->vcn.ring_jpeg.sched.ready)
++num_rings;
ib_start_alignment = 16;
ib_size_alignment = 16;
@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
&fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
if (r)
goto error_vm;
}
@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pasid = fpriv->vm.pasid;
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
amdgpu_vm_fini(adev, &fpriv->vm);
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
amdgpu_vm_fini(adev, &fpriv->vm);
if (pasid)
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);

View File

@ -57,7 +57,6 @@ struct amdgpu_hpd;
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base)
#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base);
@ -325,7 +324,7 @@ struct amdgpu_mode_info {
struct card_info *atom_card_info;
bool mode_config_initialized;
struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
struct amdgpu_plane *planes[AMDGPU_MAX_PLANES];
struct drm_plane *planes[AMDGPU_MAX_PLANES];
struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
/* DVI-I properties */
struct drm_property *coherent_mode_property;
@ -434,11 +433,6 @@ struct amdgpu_crtc {
struct drm_pending_vblank_event *event;
};
struct amdgpu_plane {
struct drm_plane base;
enum drm_plane_type plane_type;
};
struct amdgpu_encoder_atom_dig {
bool linkb;
/* atom dig */

View File

@ -2129,7 +2129,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (ring && ring->ready)
if (ring && ring->sched.ready)
amdgpu_fence_wait_empty(ring);
}

View File

@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle)
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
return 0;
}
@ -118,22 +120,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
int index)
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
{
int ret;
int index;
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
index = atomic_inc_return(&psp->fence_value);
ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
fence_mc_addr, index);
while (*((unsigned int *)psp->fence_buf) != index) {
msleep(1);
if (ret) {
atomic_dec(&psp->fence_value);
return ret;
}
while (*((unsigned int *)psp->fence_buf) != index)
msleep(1);
/* the status field must be 0 after FW is loaded */
if (ucode && psp->cmd_buf_mem->resp.status) {
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
@ -191,7 +197,7 @@ static int psp_tmr_load(struct psp_context *psp)
PSP_TMR_SIZE, psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1);
psp->fence_buf_mc_addr);
if (ret)
goto failed;
@ -258,13 +264,194 @@ static int psp_asd_load(struct psp_context *psp)
psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 2);
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
uint32_t xgmi_ta_size, uint32_t shared_size)
{
cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
}
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
{
int ret;
/*
* Allocate 16k memory aligned to 4k from Frame Buffer (local
* physical) for xgmi ta <-> Driver
*/
ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&psp->xgmi_context.xgmi_shared_bo,
&psp->xgmi_context.xgmi_shared_mc_addr,
&psp->xgmi_context.xgmi_shared_buf);
return ret;
}
static int psp_xgmi_load(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
psp->xgmi_context.xgmi_shared_mc_addr,
psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
if (!ret) {
psp->xgmi_context.initialized = 1;
psp->xgmi_context.session_id = cmd->resp.session_id;
}
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t xgmi_session_id)
{
cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
}
static int psp_xgmi_unload(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the unloading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t ta_cmd_id,
uint32_t xgmi_session_id)
{
cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
/* Note: cmd_invoke_cmd.buf is not used for now */
}
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
kfree(cmd);
return ret;
}
static int psp_xgmi_terminate(struct psp_context *psp)
{
int ret;
if (!psp->xgmi_context.initialized)
return 0;
ret = psp_xgmi_unload(psp);
if (ret)
return ret;
psp->xgmi_context.initialized = 0;
/* free xgmi shared memory */
amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
&psp->xgmi_context.xgmi_shared_mc_addr,
&psp->xgmi_context.xgmi_shared_buf);
return 0;
}
static int psp_xgmi_initialize(struct psp_context *psp)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
if (!psp->xgmi_context.initialized) {
ret = psp_xgmi_init_shared_buf(psp);
if (ret)
return ret;
}
/* Load XGMI TA */
ret = psp_xgmi_load(psp);
if (ret)
return ret;
/* Initialize XGMI session */
xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
return ret;
}
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@ -292,6 +479,15 @@ static int psp_hw_start(struct psp_context *psp)
if (ret)
return ret;
if (adev->gmc.xgmi.num_physical_nodes > 1) {
ret = psp_xgmi_initialize(psp);
/* Warning the XGMI seesion initialize failure
* Instead of stop driver initialization
*/
if (ret)
dev_err(psp->adev->dev,
"XGMI: Failed to initialize XGMI session\n");
}
return 0;
}
@ -321,7 +517,7 @@ static int psp_np_fw_load(struct psp_context *psp)
return ret;
ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
psp->fence_buf_mc_addr, i + 3);
psp->fence_buf_mc_addr);
if (ret)
return ret;
@ -452,6 +648,10 @@ static int psp_hw_fini(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.initialized == 1)
psp_xgmi_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@ -479,6 +679,15 @@ static int psp_suspend(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.initialized == 1) {
ret = psp_xgmi_terminate(psp);
if (ret) {
DRM_ERROR("Failed to terminate xgmi ta\n");
return ret;
}
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret) {
DRM_ERROR("PSP ring stop failed\n");

View File

@ -27,14 +27,17 @@
#include "amdgpu.h"
#include "psp_gfx_if.h"
#include "ta_xgmi_if.h"
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
struct psp_context;
struct psp_xgmi_node_info;
struct psp_xgmi_topology_info;
enum psp_ring_type
@ -80,12 +83,20 @@ struct psp_funcs
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
struct psp_xgmi_topology_info *topology);
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
struct psp_xgmi_topology_info *topology);
struct psp_xgmi_topology_info *topology);
};
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
};
struct psp_context
@ -96,7 +107,7 @@ struct psp_context
const struct psp_funcs *funcs;
/* fence buffer */
/* firmware buffer */
struct amdgpu_bo *fw_pri_bo;
uint64_t fw_pri_mc_addr;
void *fw_pri_buf;
@ -134,6 +145,16 @@ struct psp_context
struct amdgpu_bo *cmd_buf_bo;
uint64_t cmd_buf_mc_addr;
struct psp_gfx_cmd_resp *cmd_buf_mem;
/* fence value associated with cmd buffer */
atomic_t fence_value;
/* xgmi ta firmware and buffer */
const struct firmware *ta_fw;
uint32_t ta_xgmi_ucode_version;
uint32_t ta_xgmi_ucode_size;
uint8_t *ta_xgmi_start_addr;
struct psp_xgmi_context xgmi_context;
};
struct amdgpu_psp_funcs {
@ -141,21 +162,17 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
/* Generated by PSP to identify the GPU instance within xgmi connection */
uint64_t device_id;
/*
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
* connection vector topology, but not access enable the connections
* if some or all bits are set to 1, driver indicates it want to retrieve the
* current xgmi topology and access enable the link to GPU[i] associated
* with the bit position in the vector.
* On return,: bits indicated which xgmi links are present/active depending
* on the value passed in. The relative bit offset for the relative GPU index
* within the hive is always marked active.
*/
uint32_t connection_mask;
uint32_t reserved; /* must be 0 */
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
@ -177,8 +194,8 @@ struct psp_xgmi_topology_info {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
#define psp_xgmi_get_device_id(psp) \
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
#define psp_xgmi_get_node_id(psp) \
((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
#define psp_xgmi_get_hive_id(psp) \
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
@ -199,6 +216,8 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
#endif

View File

@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
ring->ready = false;
ring->sched.ready = false;
/* Not to finish a ring which is not initialized */
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
debugfs_remove(ring->ent);
#endif
}
/**
* amdgpu_ring_test_helper - tests ring and set sched readiness status
*
* @ring: ring to try the recovery on
*
* Tests ring and set sched readiness status
*
* Returns 0 on success, error on failure.
*/
int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int r;
r = amdgpu_ring_test_ring(ring);
if (r)
DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
ring->name, r);
else
DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
ring->name);
ring->sched.ready = !r;
return r;
}

View File

@ -129,8 +129,9 @@ struct amdgpu_ring_funcs {
unsigned emit_ib_size;
/* command emit functions */
void (*emit_ib)(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch);
bool ctx_switch);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@ -189,7 +190,6 @@ struct amdgpu_ring {
uint64_t gpu_addr;
uint64_t ptr_mask;
uint32_t buf_mask;
bool ready;
u32 idx;
u32 me;
u32 pipe;
@ -229,7 +229,7 @@ struct amdgpu_ring {
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
#endif

View File

@ -0,0 +1,282 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
* Copyright 2008 Red Hat Inc.
* Copyright 2009 Jerome Glisse.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
/**
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
*
* @adev: amdgpu_device pointer
*
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
*/
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
{
if (adev->gfx.rlc.in_safe_mode)
return;
/* if RLC is not enabled, do nothing */
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->set_safe_mode(adev);
adev->gfx.rlc.in_safe_mode = true;
}
}
/**
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
*
* @adev: amdgpu_device pointer
*
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
*/
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
{
if (!(adev->gfx.rlc.in_safe_mode))
return;
/* if RLC is not enabled, do nothing */
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
adev->gfx.rlc.funcs->unset_safe_mode(adev);
adev->gfx.rlc.in_safe_mode = false;
}
}
/**
* amdgpu_gfx_rlc_init_sr - Init save restore block
*
* @adev: amdgpu_device pointer
* @dws: the size of save restore block
*
* Allocate and setup value to save restore block of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 i;
int r;
/* allocate save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* write the sr buffer */
src_ptr = adev->gfx.rlc.reg_list;
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_init_csb - Init clear state block
*
* @adev: amdgpu_device pointer
*
* Allocate and setup value to clear state block of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_init_cpt - Init cp table
*
* @adev: amdgpu_device pointer
*
* Allocate and setup value to cp table of rlc.
* Returns 0 on succeess or negative error code if allocate failed.
*/
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
{
int r;
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
amdgpu_gfx_rlc_fini(adev);
return r;
}
/* set up the cp table */
amdgpu_gfx_rlc_setup_cp_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
return 0;
}
/**
* amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
*
* @adev: amdgpu_device pointer
*
* Write cp firmware data into cp table.
*/
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
/**
* amdgpu_gfx_rlc_fini - Free BO which used for RLC
*
* @adev: amdgpu_device pointer
*
* Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
* and rlc_jump_table_block.
*/
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
{
/* save restore block */
if (adev->gfx.rlc.save_restore_obj) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
}
/* clear state block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
/* jump table block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}

View File

@ -0,0 +1,98 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_RLC_H__
#define __AMDGPU_RLC_H__
#include "clearstate_defs.h"
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
void (*set_safe_mode)(struct amdgpu_device *adev);
void (*unset_safe_mode)(struct amdgpu_device *adev);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
};
struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
volatile uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
volatile uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
volatile uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
bool in_safe_mode;
const struct amdgpu_rlc_funcs *funcs;
/* for firmware data */
u32 save_and_restore_offset;
u32 clear_state_descriptor_offset;
u32 avail_scratch_ram_locations;
u32 reg_restore_list_size;
u32 reg_list_format_start;
u32 reg_list_format_separate_start;
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
u32 reg_list_format_direct_reg_list_length;
u32 save_restore_list_cntl_size_bytes;
u32 save_restore_list_gpm_size_bytes;
u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
u8 *save_restore_list_cntl;
u8 *save_restore_list_gpm;
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
};
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
#endif

View File

@ -28,17 +28,31 @@
* GPU SDMA IP block helpers function.
*/
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
if (&adev->sdma.instance[i].ring == ring)
break;
if (ring == &adev->sdma.instance[i].ring ||
ring == &adev->sdma.instance[i].page)
return &adev->sdma.instance[i];
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma.instance[i];
else
return NULL;
return NULL;
}
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (ring == &adev->sdma.instance[i].ring ||
ring == &adev->sdma.instance[i].page) {
*index = i;
return 0;
}
}
return -EINVAL;
}

View File

@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
uint32_t feature_version;
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
};
@ -50,6 +51,7 @@ struct amdgpu_sdma {
struct amdgpu_irq_src illegal_inst_irq;
int num_instances;
uint32_t srbm_soft_reset;
bool has_page_queue;
};
/*
@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs {
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
struct amdgpu_sdma_instance *
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
#endif

View File

@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_ARGS(vm, ring, job),
TP_STRUCT__entry(
__field(u32, pasid)
__string(ring, ring->name)
__field(u32, ring)
__field(u32, vmid)
__field(u32, vm_hub)
@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
__entry->ring = ring->idx;
__assign_str(ring, ring->name)
__entry->vmid = job->vmid;
__entry->vm_hub = ring->funcs->vmhub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
__entry->pasid, __entry->ring, __entry->vmid,
TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
__entry->pasid, __get_str(ring), __entry->vmid,
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
);
@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush,
uint64_t pd_addr),
TP_ARGS(ring, vmid, pd_addr),
TP_STRUCT__entry(
__field(u32, ring)
__string(ring, ring->name)
__field(u32, vmid)
__field(u32, vm_hub)
__field(u64, pd_addr)
),
TP_fast_assign(
__entry->ring = ring->idx;
__assign_str(ring, ring->name)
__entry->vmid = vmid;
__entry->vm_hub = ring->funcs->vmhub;
__entry->pd_addr = pd_addr;
),
TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
__entry->ring, __entry->vmid,
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
__get_str(ring), __entry->vmid,
__entry->vm_hub,__entry->pd_addr)
);

View File

@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
/*
* Global memory.
*/
/**
* amdgpu_ttm_mem_global_init - Initialize and acquire reference to
* memory object
*
* @ref: Object for initialization.
*
* This is called by drm_global_item_ref() when an object is being
* initialized.
*/
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
{
return ttm_mem_global_init(ref->object);
}
/**
* amdgpu_ttm_mem_global_release - Drop reference to a memory object
*
* @ref: Object being removed
*
* This is called by drm_global_item_unref() when an object is being
* released.
*/
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
{
ttm_mem_global_release(ref->object);
}
/**
* amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
*
* @adev: AMDGPU device for which the global structures need to be registered.
*
* This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
* during bring up.
*/
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
int r;
/* ensure reference is false in case init fails */
adev->mman.mem_global_referenced = false;
global_ref = &adev->mman.mem_global_ref;
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
global_ref->size = sizeof(struct ttm_mem_global);
global_ref->init = &amdgpu_ttm_mem_global_init;
global_ref->release = &amdgpu_ttm_mem_global_release;
r = drm_global_item_ref(global_ref);
if (r) {
DRM_ERROR("Failed setting up TTM memory accounting "
"subsystem.\n");
goto error_mem;
}
adev->mman.bo_global_ref.mem_glob =
adev->mman.mem_global_ref.object;
global_ref = &adev->mman.bo_global_ref.ref;
global_ref->global_type = DRM_GLOBAL_TTM_BO;
global_ref->size = sizeof(struct ttm_bo_global);
global_ref->init = &ttm_bo_global_init;
global_ref->release = &ttm_bo_global_release;
r = drm_global_item_ref(global_ref);
if (r) {
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
goto error_bo;
}
mutex_init(&adev->mman.gtt_window_lock);
adev->mman.mem_global_referenced = true;
return 0;
error_bo:
drm_global_item_unref(&adev->mman.mem_global_ref);
error_mem:
return r;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
mutex_destroy(&adev->mman.gtt_window_lock);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
adev->mman.mem_global_referenced = false;
}
}
static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
{
return 0;
@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
int r;
u64 vis_vram_limit;
/* initialize global references for vram/gtt */
r = amdgpu_ttm_global_init(adev);
if (r) {
return r;
}
mutex_init(&adev->mman.gtt_window_lock);
/* No others user of address space so set it to 0 */
r = ttm_bo_device_init(&adev->mman.bdev,
adev->mman.bo_global_ref.ref.object,
&amdgpu_bo_driver,
adev->ddev->anon_inode->i_mapping,
DRM_FILE_PAGE_OFFSET,
@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_ttm_global_fini(adev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
}
@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
unsigned i;
int r;
if (direct_submit && !ring->ready) {
if (direct_submit && !ring->sched.ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}

View File

@ -39,8 +39,6 @@
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
struct amdgpu_mman {
struct ttm_bo_global_ref bo_global_ref;
struct drm_global_reference mem_global_ref;
struct ttm_bo_device bdev;
bool mem_global_referenced;
bool initialized;

View File

@ -57,6 +57,17 @@ struct psp_firmware_header_v1_0 {
uint32_t sos_size_bytes;
};
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t ta_xgmi_ucode_version;
uint32_t ta_xgmi_offset_bytes;
uint32_t ta_xgmi_size_bytes;
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_offset_bytes;
uint32_t ta_ras_size_bytes;
};
/* version_major=1, version_minor=0 */
struct gfx_firmware_header_v1_0 {
struct common_firmware_header header;
@ -170,6 +181,7 @@ union amdgpu_firmware_header {
struct mc_firmware_header_v1_0 mc;
struct smc_firmware_header_v1_0 smc;
struct psp_firmware_header_v1_0 psp;
struct ta_firmware_header_v1_0 ta;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;

View File

@ -1243,30 +1243,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
uint32_t ip_instance = ring->me;
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
if (r)
goto error;
}
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
} else {
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
else if (r > 0)
r = 0;
}
dma_fence_put(fence);

View File

@ -1032,8 +1032,10 @@ out:
* @ib: the IB to execute
*
*/
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
amdgpu_ring_write(ring, VCE_CMD_IB);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
return 0;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, VCE_CMD_END);
amdgpu_ring_commit(ring);
@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= timeout)
r = -ETIMEDOUT;
}
return r;
}
@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
return 0;
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;

View File

@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, bool ctx_switch);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);

View File

@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
dma_fence_put(fence);
error:
return r;
}
@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
amdgpu_ring_commit(ring);
@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r = 0;
r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto error;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto error;
} else
} else {
r = 0;
}
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout)
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else {
DRM_ERROR("ib test failed (0x%08X)\n", tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
dma_fence_put(fence);
error:
return r;
}

View File

@ -23,16 +23,6 @@
#include "amdgpu.h"
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
return RREG32_NO_KIQ(0xc040) == 0xffffffff;
}
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
{
int r;
void *ptr;
r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
&adev->virt.csa_vmid0_addr, &ptr);
if (r)
return r;
memset(ptr, 0, AMDGPU_CSA_SIZE);
return 0;
}
void amdgpu_free_static_csa(struct amdgpu_device *adev) {
amdgpu_bo_free_kernel(&adev->virt.csa_obj,
&adev->virt.csa_vmid0_addr,
NULL);
}
/*
* amdgpu_map_static_csa should be called during amdgpu_vm_init
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
* submission of GFX should use this virtual address within META_DATA init
* package to support SRIOV gfx preemption.
*/
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer csa_tv;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&csa_tv.head);
csa_tv.bo = &adev->virt.csa_obj->tbo;
csa_tv.shared = true;
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
}
*bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
if (!*bo_va) {
ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("failed to create bo_va for static CSA\n");
return -ENOMEM;
}
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
if (r) {
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
}
void amdgpu_virt_init_setting(struct amdgpu_device *adev)
{
/* enable virtual display */
@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_read;
if (in_interrupt())
might_sleep();
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;
if (in_interrupt())
might_sleep();
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
@ -228,6 +132,46 @@ failed_kiq_write:
pr_err("failed to write reg:%x\n", reg);
}
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for IRQ context */
if (r < 1 && in_interrupt())
goto failed_kiq;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq;
return;
failed_kiq:
pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
}
/**
* amdgpu_virt_request_full_gpu() - request full gpu access
* @amdgpu: amdgpu device.

View File

@ -238,7 +238,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
struct amdgpu_virt {
uint32_t caps;
struct amdgpu_bo *csa_obj;
uint64_t csa_vmid0_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
@ -251,8 +250,6 @@ struct amdgpu_virt {
uint32_t gim_feature;
};
#define AMDGPU_CSA_SIZE (8 * 1024)
#define amdgpu_sriov_enabled(adev) \
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@ -277,17 +274,13 @@ static inline bool is_virtual_machine(void)
#endif
}
struct amdgpu_vm;
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va);
void amdgpu_free_static_csa(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
uint32_t ref, uint32_t mask);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);

View File

@ -63,7 +63,7 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
struct psp_xgmi_topology_info *tmp_topology;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev;
@ -73,10 +73,12 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if ((adev->asic_type < CHIP_VEGA20) ||
(adev->flags & AMD_IS_APU) )
return 0;
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
if (!tmp_topology)
return -ENOMEM;
mutex_lock(&xgmi_mutex);
hive = amdgpu_get_xgmi_hive(adev);
if (!hive)
@ -84,23 +86,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
tmp_topology[count++].device_id = entry->device_id;
tmp_topology->nodes[count++].node_id = entry->node_id;
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
if (ret) {
dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.device_id,
adev->gmc.xgmi.hive_id, ret);
goto exit;
/* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
break;
}
}
/* Each psp need to set the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.device_id,
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
break;
@ -113,7 +120,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
exit:
mutex_unlock(&xgmi_mutex);
kfree(tmp_topology);
return ret;
}

View File

@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
pi->caps_td_ramping || pi->caps_tcp_ramping) {
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable) {
ret = ci_program_pt_config_registers(adev, didt_config_ci);
if (ret) {
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return ret;
}
}
ci_do_enable_didt(adev, enable);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
return 0;

View File

@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (CIK).
*/
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
cik_sdma_enable(adev, true);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

View File

@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
ib.ptr[2] = 0xDEADBEEF;
@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
CP_ME_CNTL__CE_HALT_MASK));
WREG32(mmSCRATCH_UMSK, 0);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
}
udelay(50);
}
@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the rings */
gfx_v6_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
return 0;
}
@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB2_CNTL, tmp);
WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
adev->gfx.compute_ring[0].ready = false;
adev->gfx.compute_ring[1].ready = false;
for (i = 0; i < 2; i++) {
r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
if (r)
return r;
adev->gfx.compute_ring[i].ready = true;
}
return 0;
@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
}
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 dws, i;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
int r;
@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (src_ptr) {
/* save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n",
r);
gfx_v6_0_rlc_fini(adev);
/* init save restore block */
r = amdgpu_gfx_rlc_init_sr(adev, dws);
if (r)
return r;
}
/* write the sr buffer */
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
}
if (cs_data) {
@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v6_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
return r;
}
@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
if (!adev->gfx.rlc_fw)
return -EINVAL;
gfx_v6_0_rlc_stop(adev);
gfx_v6_0_rlc_reset(adev);
adev->gfx.rlc.funcs->stop(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v6_0_init_pg(adev);
gfx_v6_0_init_cg(adev);
@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_UCODE_ADDR, 0);
gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
gfx_v6_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
};
static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.init = gfx_v6_0_rlc_init,
.resume = gfx_v6_0_rlc_resume,
.stop = gfx_v6_0_rlc_stop,
.reset = gfx_v6_0_rlc_reset,
.start = gfx_v6_0_rlc_start
};
static int gfx_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle)
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
gfx_v6_0_set_ring_funcs(adev);
gfx_v6_0_set_irq_funcs(adev);
@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle)
return r;
}
r = gfx_v6_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle)
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v6_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
return 0;
}
@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle)
gfx_v6_0_constants_init(adev);
r = gfx_v6_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfx_v6_0_cp_enable(adev, false);
gfx_v6_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v6_0_fini_pg(adev);
return 0;
@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v6_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
switch (entry->ring_id) {
case 0:
ring = &adev->gfx.gfx_ring[0];
break;
case 1:
case 2:
ring = &adev->gfx.compute_ring[entry->ring_id - 1];
break;
default:
return;
}
drm_sched_fault(&ring->sched);
}
static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v6_0_fault(adev, entry);
return 0;
}
@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v6_0_fault(adev, entry);
return 0;
}

View File

@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
* on the gfx ring for execution by the GPU.
*/
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
ib.ptr[2] = 0xDEADBEEF;
@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
udelay(50);
}
@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v7_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
return 0;
}
@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
}
udelay(50);
}
@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
* GFX7_MEC_HPD_SIZE * 2;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
amdgpu_ring_test_helper(ring);
}
return 0;
@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
* The RLC is a multi-purpose microengine that handles a
* variety of functions.
*/
static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
}
static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
volatile u32 *dst_ptr;
u32 dws, i;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (src_ptr) {
/* save restore block */
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.save_restore_obj,
&adev->gfx.rlc.save_restore_gpu_addr,
(void **)&adev->gfx.rlc.sr_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
/* init save restore block */
r = amdgpu_gfx_rlc_init_sr(adev, dws);
if (r)
return r;
}
/* write the sr buffer */
dst_ptr = adev->gfx.rlc.sr_ptr;
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
}
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v7_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if (adev->gfx.rlc.cp_table_size) {
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
gfx_v7_0_rlc_fini(adev);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
gfx_v7_0_init_cp_pg_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
return 0;
@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
return orig;
}
static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
{
return true;
}
static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
{
u32 tmp, i, mask;
@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
}
}
static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
{
u32 tmp;
@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
adev->gfx.rlc_feature_version = le32_to_cpu(
hdr->ucode_feature_version);
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
gfx_v7_0_rlc_reset(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v7_0_init_pg(adev);
@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_BONAIRE)
WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
gfx_v7_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
WREG32(mmRLC_PG_CNTL, data);
}
static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 4;
u32 bo_offset = 0;
u32 table_offset, table_size;
if (adev->asic_type == CHIP_KAVERI)
max_me = 5;
if (adev->gfx.rlc.cp_table_ptr == NULL)
return;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
return 5;
else
return 4;
}
static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
};
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
.set_safe_mode = gfx_v7_0_set_safe_mode,
.unset_safe_mode = gfx_v7_0_unset_safe_mode,
.init = gfx_v7_0_rlc_init,
.get_csb_size = gfx_v7_0_get_csb_size,
.get_csb_buffer = gfx_v7_0_get_csb_buffer,
.get_cp_table_num = gfx_v7_0_cp_pg_table_num,
.resume = gfx_v7_0_rlc_resume,
.stop = gfx_v7_0_rlc_stop,
.reset = gfx_v7_0_rlc_reset,
.start = gfx_v7_0_rlc_start
};
static int gfx_v7_0_early_init(void *handle)
@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
r = gfx_v7_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v7_0_cp_compute_fini(adev);
gfx_v7_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
gfx_v7_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
/* init rlc */
r = gfx_v7_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
gfx_v7_0_cp_enable(adev, false);
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v7_0_fini_pg(adev);
return 0;
@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle)
gfx_v7_0_update_cg(adev, false);
/* stop the rlc */
gfx_v7_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* Disable GFX parsing/prefetching */
WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v7_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
struct amdgpu_ring *ring;
u8 me_id, pipe_id;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if ((ring->me == me_id) && (ring->pipe == pipe_id))
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v7_0_fault(adev, entry);
return 0;
}
@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
{
DRM_ERROR("Illegal instruction in command stream\n");
// XXX soft reset the gfx block only
schedule_work(&adev->reset_work);
gfx_v7_0_fault(adev, entry);
return 0;
}

View File

@ -54,7 +54,7 @@
#include "ivsrcid/ivsrcid_vislands30.h"
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
#define GFX8_MEC_HPD_SIZE 4096
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@ -839,18 +839,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -862,14 +858,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -886,19 +879,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
ib.ptr[2] = lower_32_bits(gpu_addr);
@ -912,22 +902,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
else
r = -EINVAL;
}
err2:
amdgpu_ib_free(adev, &ib, NULL);
@ -1298,81 +1283,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(0);
}
static void cz_init_cp_jump_table(struct amdgpu_device *adev)
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 4;
u32 bo_offset = 0;
u32 table_offset, table_size;
if (adev->asic_type == CHIP_CARRIZO)
max_me = 5;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
return 5;
else
return 4;
}
static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@ -1381,44 +1301,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
gfx_v8_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v8_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_STONEY)) {
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
cz_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
return 0;
@ -1443,7 +1337,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@ -1629,7 +1523,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
return 0;
/* bail if the compute ring is not ready */
if (!ring->ready)
if (!ring->sched.ready)
return 0;
tmp = RREG32(mmGB_EDC_MODE);
@ -2088,7 +1982,7 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
r = gfx_v8_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@ -2181,7 +2075,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev);
gfx_v8_0_mec_fini(adev);
gfx_v8_0_rlc_fini(adev);
amdgpu_gfx_rlc_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
@ -4175,10 +4069,10 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
{
gfx_v8_0_rlc_stop(adev);
gfx_v8_0_rlc_reset(adev);
adev->gfx.rlc.funcs->stop(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v8_0_init_pg(adev);
gfx_v8_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@ -4197,7 +4091,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32(mmCP_ME_CNTL, tmp);
udelay(50);
@ -4379,10 +4273,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
amdgpu_ring_clear_ring(ring);
gfx_v8_0_cp_gfx_start(adev);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
ring->sched.ready = true;
r = amdgpu_ring_test_helper(ring);
return r;
}
@ -4396,8 +4288,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
} else {
WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.kiq.ring.ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
}
@ -4473,11 +4365,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ enable failed\n");
kiq_ring->ready = false;
}
return r;
}
@ -4781,7 +4671,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@ -4820,10 +4710,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
*/
for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
}
done:
@ -4867,7 +4754,7 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
r = gfx_v8_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -4899,7 +4786,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
r = amdgpu_ring_test_ring(kiq_ring);
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@ -4973,16 +4860,16 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (!gfx_v8_0_wait_for_idle(adev))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
if (!gfx_v8_0_wait_for_rlc_idle(adev))
gfx_v8_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
else
pr_err("rlc is busy, skip halt rlc\n");
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return 0;
}
@ -5071,7 +4958,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
srbm_soft_reset = adev->gfx.srbm_soft_reset;
/* stop the rlc */
gfx_v8_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
@ -5197,7 +5084,7 @@ static int gfx_v8_0_post_soft_reset(void *handle)
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
gfx_v8_0_cp_gfx_resume(adev);
gfx_v8_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@ -5445,7 +5332,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@ -5499,7 +5386,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
AMD_PG_SUPPORT_RLC_SMU_HS |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GFX_DMG))
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return 0;
}
@ -5593,57 +5480,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
{
u32 data;
uint32_t rlc_setting;
rlc_setting = RREG32(mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return false;
return true;
}
static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
{
uint32_t data;
unsigned i;
data = RREG32(mmRLC_CNTL);
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32(mmRLC_SAFE_MODE, data);
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32(mmRLC_SAFE_MODE, data);
for (i = 0; i < adev->usec_timeout; i++) {
if ((RREG32(mmRLC_GPM_STAT) &
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
break;
udelay(1);
}
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
adev->gfx.rlc.in_safe_mode = true;
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if ((RREG32(mmRLC_GPM_STAT) &
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
(RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
break;
udelay(1);
}
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
}
static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
{
u32 data = 0;
uint32_t data;
unsigned i;
data = RREG32(mmRLC_CNTL);
if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->gfx.rlc.in_safe_mode) {
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
WREG32(mmRLC_SAFE_MODE, data);
adev->gfx.rlc.in_safe_mode = false;
}
}
data |= RLC_SAFE_MODE__CMD_MASK;
data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
WREG32(mmRLC_SAFE_MODE, data);
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
@ -5653,8 +5536,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
}
static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
.enter_safe_mode = iceland_enter_rlc_safe_mode,
.exit_safe_mode = iceland_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
.set_safe_mode = gfx_v8_0_set_safe_mode,
.unset_safe_mode = gfx_v8_0_unset_safe_mode,
.init = gfx_v8_0_rlc_init,
.get_csb_size = gfx_v8_0_get_csb_size,
.get_csb_buffer = gfx_v8_0_get_csb_buffer,
.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
.resume = gfx_v8_0_rlc_resume,
.stop = gfx_v8_0_rlc_stop,
.reset = gfx_v8_0_rlc_reset,
.start = gfx_v8_0_rlc_start
};
static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@ -5662,7 +5554,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
@ -5758,7 +5650,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@ -5768,7 +5660,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
@ -5851,7 +5743,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
gfx_v8_0_wait_for_rlc_serdes(adev);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
@ -6131,9 +6023,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
}
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@ -6161,9 +6055,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@ -6738,12 +6634,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v8_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v8_0_fault(adev, entry);
return 0;
}
@ -6752,7 +6675,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v8_0_fault(adev, entry);
return 0;
}
@ -6976,10 +6899,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
17 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,

View File

@ -41,7 +41,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 2048
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@ -396,18 +396,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
if (r)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
if (r)
goto error_free_scratch;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -419,14 +415,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
ring->idx, scratch, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@ -443,19 +436,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
ib.ptr[2] = lower_32_bits(gpu_addr);
@ -469,22 +459,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
goto err2;
}
tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
r = 0;
} else {
DRM_ERROR("ib test on ring %d failed\n", ring->idx);
r = -EINVAL;
}
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
amdgpu_ib_free(adev, &ib, NULL);
@ -1065,85 +1050,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
}
static void rv_init_cp_jump_table(struct amdgpu_device *adev)
static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
{
const __le32 *fw_data;
volatile u32 *dst_ptr;
int me, i, max_me = 5;
u32 bo_offset = 0;
u32 table_offset, table_size;
/* write the cp table buffer */
dst_ptr = adev->gfx.rlc.cp_table_ptr;
for (me = 0; me < max_me; me++) {
if (me == 0) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
fw_data = (const __le32 *)
(adev->gfx.ce_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 1) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
fw_data = (const __le32 *)
(adev->gfx.pfp_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 2) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
fw_data = (const __le32 *)
(adev->gfx.me_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 3) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
} else if (me == 4) {
const struct gfx_firmware_header_v1_0 *hdr =
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
fw_data = (const __le32 *)
(adev->gfx.mec2_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
table_offset = le32_to_cpu(hdr->jt_offset);
table_size = le32_to_cpu(hdr->jt_size);
}
for (i = 0; i < table_size; i ++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
bo_offset += table_size;
}
}
static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
{
/* clear state block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
/* jump table block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
return 5;
}
static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
{
volatile u32 *dst_ptr;
u32 dws;
const struct cs_section_def *cs_data;
int r;
@ -1152,45 +1065,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
cs_data = adev->gfx.rlc.cs_data;
if (cs_data) {
/* clear state block */
adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
r);
gfx_v9_0_rlc_fini(adev);
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* set up the cs buffer */
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v9_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if (adev->asic_type == CHIP_RAVEN) {
/* TODO: double check the cp_table_size for RV */
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
if (r) {
dev_err(adev->dev,
"(%d) failed to create cp table bo\n", r);
gfx_v9_0_rlc_fini(adev);
r = amdgpu_gfx_rlc_init_cpt(adev);
if (r)
return r;
}
rv_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
}
switch (adev->asic_type) {
@ -1264,7 +1150,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@ -1635,8 +1521,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
/* Clear GDS reserved memory */
r = amdgpu_ring_alloc(ring, 17);
if (r) {
DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
ring->idx, r);
DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
ring->name, r);
return r;
}
@ -1748,7 +1634,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
r = gfx_v9_0_rlc_init(adev);
r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@ -2498,12 +2384,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
gfx_v9_0_rlc_reset(adev);
adev->gfx.rlc.funcs->reset(adev);
gfx_v9_0_init_pg(adev);
@ -2514,15 +2400,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
if (adev->asic_type == CHIP_RAVEN ||
adev->asic_type == CHIP_VEGA20) {
if (amdgpu_lbpw != 0)
switch (adev->asic_type) {
case CHIP_RAVEN:
if (amdgpu_lbpw == 0)
gfx_v9_0_enable_lbpw(adev, false);
else
gfx_v9_0_enable_lbpw(adev, true);
break;
case CHIP_VEGA20:
if (amdgpu_lbpw > 0)
gfx_v9_0_enable_lbpw(adev, true);
else
gfx_v9_0_enable_lbpw(adev, false);
break;
default:
break;
}
gfx_v9_0_rlc_start(adev);
adev->gfx.rlc.funcs->start(adev);
return 0;
}
@ -2537,7 +2432,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
if (!enable) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].ready = false;
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
udelay(50);
@ -2727,7 +2622,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v9_0_cp_gfx_start(adev);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@ -2742,8 +2637,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].ready = false;
adev->gfx.kiq.ring.ready = false;
adev->gfx.compute_ring[i].sched.ready = false;
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
}
@ -2866,11 +2761,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
r = amdgpu_ring_test_ring(kiq_ring);
if (r) {
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ enable failed\n");
kiq_ring->ready = false;
}
return r;
}
@ -3249,7 +3142,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
amdgpu_bo_kunmap(ring->mqd_obj);
ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
ring->ready = true;
ring->sched.ready = true;
return 0;
}
@ -3314,19 +3207,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
ring = &adev->gfx.gfx_ring[0];
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
ring->ready = false;
amdgpu_ring_test_helper(ring);
}
gfx_v9_0_enable_gui_idle_interrupt(adev, true);
@ -3353,7 +3240,7 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
r = gfx_v9_0_rlc_resume(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@ -3391,7 +3278,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
r = amdgpu_ring_test_ring(kiq_ring);
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@ -3433,7 +3320,7 @@ static int gfx_v9_0_hw_fini(void *handle)
}
gfx_v9_0_cp_enable(adev, false);
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
gfx_v9_0_csb_vram_unpin(adev);
@ -3508,7 +3395,7 @@ static int gfx_v9_0_soft_reset(void *handle)
if (grbm_soft_reset) {
/* stop the rlc */
gfx_v9_0_rlc_stop(adev);
adev->gfx.rlc.funcs->stop(adev);
/* Disable GFX parsing/prefetching */
gfx_v9_0_cp_gfx_enable(adev, false);
@ -3607,64 +3494,47 @@ static int gfx_v9_0_late_init(void *handle)
return 0;
}
static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
{
uint32_t rlc_setting, data;
unsigned i;
if (adev->gfx.rlc.in_safe_mode)
return;
uint32_t rlc_setting;
/* if RLC is not enabled, do nothing */
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
return false;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
return true;
}
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
adev->gfx.rlc.in_safe_mode = true;
static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
{
uint32_t data;
unsigned i;
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
}
static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
{
uint32_t rlc_setting, data;
uint32_t data;
if (!adev->gfx.rlc.in_safe_mode)
return;
/* if RLC is not enabled, do nothing */
rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
return;
if (adev->cg_flags &
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
/*
* Try to exit safe mode only if it is already in safe
* mode.
*/
data = RLC_SAFE_MODE__CMD_MASK;
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
adev->gfx.rlc.in_safe_mode = false;
}
data = RLC_SAFE_MODE__CMD_MASK;
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
}
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
gfx_v9_0_enter_rlc_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@ -3675,7 +3545,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
gfx_v9_0_exit_rlc_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@ -3773,7 +3643,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
/* Enable 3D CGCG/CGLS */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
@ -3813,7 +3683,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@ -3821,7 +3691,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
@ -3861,7 +3731,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@ -3890,8 +3760,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
}
static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
.set_safe_mode = gfx_v9_0_set_safe_mode,
.unset_safe_mode = gfx_v9_0_unset_safe_mode,
.init = gfx_v9_0_rlc_init,
.get_csb_size = gfx_v9_0_get_csb_size,
.get_csb_buffer = gfx_v9_0_get_csb_buffer,
.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
.resume = gfx_v9_0_rlc_resume,
.stop = gfx_v9_0_rlc_stop,
.reset = gfx_v9_0_rlc_reset,
.start = gfx_v9_0_rlc_start
};
static int gfx_v9_0_set_powergating_state(void *handle,
@ -4072,9 +3951,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
if (ib->flags & AMDGPU_IB_FLAG_CE)
@ -4103,20 +3984,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
(2 << 0) |
#endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, control);
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, control);
}
static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@ -4695,12 +4578,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
return 0;
}
static void gfx_v9_0_fault(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
int i;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
if (ring->me == me_id && ring->pipe == pipe_id &&
ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
}
}
static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
schedule_work(&adev->reset_work);
gfx_v9_0_fault(adev, entry);
return 0;
}
@ -4709,7 +4619,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in command stream\n");
schedule_work(&adev->reset_work);
gfx_v9_0_fault(adev, entry);
return 0;
}
@ -4836,10 +4746,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v9_0_ring_emit_rreg,

View File

@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
}
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
}
static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
gfxhub_v1_0_init_gart_pt_regs(adev);
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));

View File

@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v1_0_init(struct amdgpu_device *adev);
u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
#endif

View File

@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
return 0;
}
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
{
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v6_0_set_fault_enable_default(adev, true);
gmc_v6_0_flush_gpu_tlb(adev, 0);
gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested page table (CIK).
*/
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmCHUB_CONTROL, tmp);
}
gmc_v7_0_flush_gpu_tlb(adev, 0);
gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@ -611,7 +611,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* Flush the TLB for the requested page table (CIK).
*/
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
uint32_t vmid, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@ -920,7 +920,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v8_0_set_fault_enable_default(adev, true);
gmc_v8_0_flush_gpu_tlb(adev, 0);
gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);

View File

@ -293,14 +293,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
}
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
@ -312,48 +312,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
return req;
}
static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
/* don't wait anymore for IRQ context */
if (r < 1 && in_interrupt())
goto failed_kiq;
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY)
goto failed_kiq;
return 0;
failed_kiq:
pr_err("failed to invalidate tlb with kiq\n");
return r;
}
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@ -362,64 +320,47 @@ failed_kiq:
*/
/**
* gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
* gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
* @flush_type: the flush type
*
* Flush the TLB for the requested page table.
* Flush the TLB for the requested page table using certain type.
*/
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
uint32_t vmid, uint32_t flush_type)
{
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
int r;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = &adev->vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
if (adev->gfx.kiq.ring.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
!adev->in_gpu_reset) {
r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
if (!r)
continue;
if (i == AMDGPU_GFXHUB && !adev->in_gpu_reset &&
adev->gfx.kiq.ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
1 << vmid);
continue;
}
spin_lock(&adev->gmc.invalidate_lock);
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/* Busy wait for ACK.*/
for (j = 0; j < 100; j++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp &= 1 << vmid;
if (tmp)
break;
cpu_relax();
}
if (j < 100) {
spin_unlock(&adev->gmc.invalidate_lock);
continue;
}
/* Wait for ACK with a delay.*/
for (j = 0; j < adev->usec_timeout; j++) {
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
tmp &= 1 << vmid;
if (tmp)
if (tmp & (1 << vmid))
break;
udelay(1);
}
if (j < adev->usec_timeout) {
spin_unlock(&adev->gmc.invalidate_lock);
continue;
}
spin_unlock(&adev->gmc.invalidate_lock);
if (j < adev->usec_timeout)
continue;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
}
@ -429,7 +370,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
@ -739,9 +680,8 @@ static int gmc_v9_0_late_init(void *handle)
unsigned vmhub = ring->funcs->vmhub;
ring->vm_inv_eng = vm_inv_eng[vmhub]++;
dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
ring->idx, ring->name, ring->vm_inv_eng,
ring->funcs->vmhub);
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
}
/* Engine 16 is used for KFD and 17 for GART flushes */
@ -1122,7 +1062,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
gfxhub_v1_0_set_fault_enable_default(adev, value);
mmhub_v1_0_set_fault_enable_default(adev, value);
gmc_v9_0_flush_gpu_tlb(adev, 0);
gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),

View File

@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
pi->caps_db_ramping ||
pi->caps_td_ramping ||
pi->caps_tcp_ramping) {
adev->gfx.rlc.funcs->enter_safe_mode(adev);
amdgpu_gfx_rlc_enter_safe_mode(adev);
if (enable) {
ret = kv_program_pt_config_registers(adev, didt_config_kv);
if (ret) {
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
return ret;
}
}
kv_do_enable_didt(adev, enable);
adev->gfx.rlc.funcs->exit_safe_mode(adev);
amdgpu_gfx_rlc_exit_safe_mode(adev);
}
return 0;

View File

@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
return base;
}
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
/* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
- mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
offset * vmid, lower_32_bits(page_table_base));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(value));
WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
offset * vmid, upper_32_bits(page_table_base));
}
static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
mmhub_v1_0_init_gart_pt_regs(adev);
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));

View File

@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable);
void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
#endif

View File

@ -34,6 +34,7 @@
#include "nbio/nbio_7_4_offset.h"
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
const char *chip_name;
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *hdr;
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
if (err)
goto out;
hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
le32_to_cpu(hdr->sos_size_bytes);
adev->psp.sys_start_addr = (uint8_t *)hdr +
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) -
le32_to_cpu(sos_hdr->sos_size_bytes);
adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(hdr->sos_offset_bytes);
le32_to_cpu(sos_hdr->sos_offset_bytes);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
if (err)
goto out;
err = amdgpu_ucode_validate(adev->psp.ta_fw);
if (err)
goto out;
ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
return 0;
out:
if (err) {
@ -167,7 +185,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
/* Copy PSP System Driver binary to memory */
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
/* Provide the sys driver to bootrom */
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 1 << 16;
@ -208,7 +226,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
/* Copy Secure OS binary to PSP memory */
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
/* Provide the PSP secure OS to bootrom */
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 2 << 16;
@ -552,24 +570,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
int i;
int ret;
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
/* Fill in the shared memory with topology information as input */
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
}
/* Invoke xgmi ta to get the topology information */
ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
if (ret)
return ret;
/* Read the output topology information from the shared memory */
topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
for (i = 0; i < topology->num_nodes; i++) {
topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
}
return 0;
}
static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
int number_devices, struct psp_xgmi_topology_info *topology)
{
return 0;
struct ta_xgmi_shared_memory *xgmi_cmd;
struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
int i;
if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
return -EINVAL;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
}
/* Invoke xgmi ta to set topology information */
return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
}
static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
{
u64 hive_id = 0;
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
/* Remove me when we can get correct hive_id through PSP */
if (psp->adev->gmc.xgmi.num_physical_nodes)
hive_id = 0x123456789abcdef;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
return hive_id;
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
/* Invoke xgmi ta to get hive id */
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return 0;
else
return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
}
static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
{
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
/* Invoke xgmi ta to get the node id */
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return 0;
else
return xgmi_cmd->xgmi_out_message.get_node_id.node_id;
}
static const struct psp_funcs psp_v11_0_funcs = {
@ -587,6 +691,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
.xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)

View File

@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
/* Copy PSP System Driver binary to memory */
memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
/* Provide the sys driver to bootrom */
/* Provide the sys driver to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 1 << 16;
@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* Copy Secure OS binary to PSP memory */
memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
/* Provide the PSP secure OS to bootrom */
/* Provide the PSP secure OS to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = 2 << 16;

View File

@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI).
*/
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
sdma_v2_4_enable(adev, true);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

View File

@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VI).
*/
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
/* enable DMA IBs */
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
/* unhalt the MEs */
@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
*/
static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u8 instance_id, queue_id;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
instance_id = (entry->ring_id & 0x3) >> 0;
queue_id = (entry->ring_id & 0xc) >> 2;
if (instance_id <= 1 && queue_id == 0)
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
return 0;
}

View File

@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
#define WREG32_SDMA(instance, offset, value) \
WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
#define RREG32_SDMA(instance, offset) \
RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@ -367,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
ring->me, highbit, lowbit);
wptr = highbit;
wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
wptr = wptr << 32;
wptr |= lowbit;
wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
ring->me, wptr);
}
return wptr >> 2;
@ -417,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
lower_32_bits(ring->wptr << 2));
WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
upper_32_bits(ring->wptr << 2));
}
}
/**
* sdma_v4_0_page_ring_get_wptr - get the current write pointer
*
* @ring: amdgpu ring pointer
*
* Get the current wptr from the hardware (VEGA10+).
*/
static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u64 wptr;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
} else {
wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
wptr = wptr << 32;
wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
}
return wptr >> 2;
}
/**
* sdma_v4_0_ring_set_wptr - commit the write pointer
*
* @ring: amdgpu ring pointer
*
* Write the wptr back to the hardware (VEGA10+).
*/
static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (ring->use_doorbell) {
u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
/* XXX check if swapping is necessary on BE */
WRITE_ONCE(*wb, (ring->wptr << 2));
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
uint64_t wptr = ring->wptr << 2;
WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
lower_32_bits(wptr));
WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
upper_32_bits(wptr));
}
}
static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
int i;
for (i = 0; i < count; i++)
@ -444,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
* Schedule an IB in the DMA ring (VEGA10).
*/
static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
@ -568,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
sdma0->ready = false;
sdma1->ready = false;
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
@ -592,6 +648,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
/* XXX todo */
}
/**
* sdma_v4_0_page_stop - stop the page async dma engines
*
* @adev: amdgpu_device pointer
*
* Stop the page async dma ring buffers (VEGA10).
*/
static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
{
struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
u32 rb_cntl, ib_cntl;
int i;
if ((adev->mman.buffer_funcs_ring == sdma0) ||
(adev->mman.buffer_funcs_ring == sdma1))
amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
IB_ENABLE, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
}
sdma0->sched.ready = false;
sdma1->sched.ready = false;
}
/**
* sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
*
@ -630,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, enable ? 1 : 0);
if (enable && amdgpu_sdma_phase_quantum) {
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
}
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
}
}
@ -662,156 +748,217 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
if (enable == false) {
sdma_v4_0_gfx_stop(adev);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
}
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
}
}
/**
* sdma_v4_0_rb_cntl - get parameters for rb_cntl
*/
static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
{
/* Set ring buffer size in dwords */
uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
return rb_cntl;
}
/**
* sdma_v4_0_gfx_resume - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
* @i: instance to resume
*
* Set up the gfx DMA ring buffers and enable them (VEGA10).
* Returns 0 for success, error for failure.
*/
static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring;
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 rb_bufsz;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u32 temp;
u64 wptr_gpu_addr;
int i, r;
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
wb_offset = (ring->rptr_offs * 4);
wb_offset = (ring->rptr_offs * 4);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
/* Set ring buffer size in dwords */
rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
/* Initialize the ring buffer's read and write pointers */
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
/* Initialize the ring buffer's read and write pointers */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
/* set the wb address whether it's enabled or not */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
ring->wptr = 0;
ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
}
doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
if (ring->use_doorbell) {
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
ring->use_doorbell);
doorbell_offset = REG_SET_FIELD(doorbell_offset,
SDMA0_GFX_DOORBELL_OFFSET,
OFFSET, ring->doorbell_index);
} else {
doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
}
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
ring->doorbell_index);
WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
ring->doorbell_index);
if (amdgpu_sriov_vf(adev))
sdma_v4_0_ring_set_wptr(ring);
sdma_v4_0_ring_set_wptr(ring);
/* set minor_ptr_update to 0 after wptr programed */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
/* set minor_ptr_update to 0 after wptr programed */
WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
/* set utc l1 enable flag always to 1 */
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
if (!amdgpu_sriov_vf(adev)) {
/* unhalt engine */
temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
}
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
lower_32_bits(wptr_gpu_addr));
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
if (amdgpu_sriov_vf(adev))
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
else
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
/* enable DMA IBs */
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
ring->ready = true;
ring->sched.ready = true;
}
if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
sdma_v4_0_ctx_switch_enable(adev, true);
sdma_v4_0_enable(adev, true);
}
/**
* sdma_v4_0_page_resume - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
* @i: instance to resume
*
* Set up the page DMA ring buffers and enable them (VEGA10).
* Returns 0 for success, error for failure.
*/
static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 wb_offset;
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
return r;
}
wb_offset = (ring->rptr_offs * 4);
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
}
/* Initialize the ring buffer's read and write pointers */
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
return 0;
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
RPTR_WRITEBACK_ENABLE, 1);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
ring->use_doorbell);
doorbell_offset = REG_SET_FIELD(doorbell_offset,
SDMA0_PAGE_DOORBELL_OFFSET,
OFFSET, ring->doorbell_index);
WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
/* TODO: enable doorbell support */
/*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
ring->doorbell_index);*/
sdma_v4_0_ring_set_wptr(ring);
/* set minor_ptr_update to 0 after wptr programed */
WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
/* setup the wptr shadow polling */
wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
lower_32_bits(wptr_gpu_addr));
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_PAGE_RB_WPTR_POLL_CNTL,
F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
/* enable DMA IBs */
WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
ring->sched.ready = true;
}
static void
@ -922,12 +1069,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
(adev->sdma.instance[i].fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
for (j = 0; j < fw_size; j++)
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
le32_to_cpup(fw_data++));
WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
adev->sdma.instance[i].fw_version);
}
return 0;
@ -943,33 +1092,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
*/
static int sdma_v4_0_start(struct amdgpu_device *adev)
{
int r = 0;
struct amdgpu_ring *ring;
int i, r;
if (amdgpu_sriov_vf(adev)) {
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
} else {
/* set RB registers */
r = sdma_v4_0_gfx_resume(adev);
return r;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
r = sdma_v4_0_load_microcode(adev);
if (r)
return r;
}
/* unhalt the MEs */
sdma_v4_0_enable(adev, true);
/* enable sdma ring preemption */
sdma_v4_0_ctx_switch_enable(adev, true);
}
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
r = sdma_v4_0_load_microcode(adev);
/* start the gfx rings and rlc compute queues */
for (i = 0; i < adev->sdma.num_instances; i++) {
uint32_t temp;
WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
sdma_v4_0_gfx_resume(adev, i);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_resume(adev, i);
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, mmSDMA0_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
if (!amdgpu_sriov_vf(adev)) {
/* unhalt engine */
temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
}
}
if (amdgpu_sriov_vf(adev)) {
sdma_v4_0_ctx_switch_enable(adev, true);
sdma_v4_0_enable(adev, true);
} else {
r = sdma_v4_0_rlc_resume(adev);
if (r)
return r;
}
/* unhalt the MEs */
sdma_v4_0_enable(adev, true);
/* enable sdma ring preemption */
sdma_v4_0_ctx_switch_enable(adev, true);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
/* start the gfx rings and rlc compute queues */
r = sdma_v4_0_gfx_resume(adev);
if (r)
return r;
r = sdma_v4_0_rlc_resume(adev);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
if (adev->sdma.has_page_queue) {
struct amdgpu_ring *page = &adev->sdma.instance[i].page;
r = amdgpu_ring_test_helper(page);
if (r)
return r;
if (adev->mman.buffer_funcs_ring == page)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
}
@ -993,21 +1187,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@ -1024,15 +1213,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@ -1055,20 +1240,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@ -1087,21 +1268,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@ -1206,7 +1383,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
u32 pad_count;
int i;
@ -1276,10 +1453,18 @@ static int sdma_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->asic_type == CHIP_RAVEN)
if (adev->asic_type == CHIP_RAVEN) {
adev->sdma.num_instances = 1;
else
adev->sdma.has_page_queue = false;
} else {
adev->sdma.num_instances = 2;
/* TODO: Page queue breaks driver reload under SRIOV */
if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
adev->sdma.has_page_queue = false;
else if (adev->asic_type != CHIP_VEGA20 &&
adev->asic_type != CHIP_VEGA12)
adev->sdma.has_page_queue = true;
}
sdma_v4_0_set_ring_funcs(adev);
sdma_v4_0_set_buffer_funcs(adev);
@ -1340,6 +1525,21 @@ static int sdma_v4_0_sw_init(void *handle)
AMDGPU_SDMA_IRQ_TRAP1);
if (r)
return r;
if (adev->sdma.has_page_queue) {
ring = &adev->sdma.instance[i].page;
ring->ring_obj = NULL;
ring->use_doorbell = false;
sprintf(ring->name, "page%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
&adev->sdma.trap_irq,
(i == 0) ?
AMDGPU_SDMA_IRQ_TRAP0 :
AMDGPU_SDMA_IRQ_TRAP1);
if (r)
return r;
}
}
return r;
@ -1350,8 +1550,11 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
if (adev->sdma.has_page_queue)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
for (i = 0; i < adev->sdma.num_instances; i++) {
release_firmware(adev->sdma.instance[i].fw);
@ -1414,7 +1617,7 @@ static bool sdma_v4_0_is_idle(void *handle)
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
return false;
@ -1430,8 +1633,8 @@ static int sdma_v4_0_wait_for_idle(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
return 0;
@ -1452,16 +1655,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
unsigned type,
enum amdgpu_interrupt_state state)
{
unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1;
u32 sdma_cntl;
u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
sdma_cntl = RREG32(reg_offset);
sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL);
sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
WREG32(reg_offset, sdma_cntl);
WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl);
return 0;
}
@ -1470,39 +1670,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
uint32_t instance;
DRM_DEBUG("IH: SDMA trap\n");
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[0].ring);
break;
case 1:
/* XXX compute */
break;
case 2:
/* XXX compute */
break;
case 3:
/* XXX page queue*/
break;
}
instance = 0;
break;
case SOC15_IH_CLIENTID_SDMA1:
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[1].ring);
break;
case 1:
/* XXX compute */
break;
case 2:
/* XXX compute */
break;
case 3:
/* XXX page queue*/
break;
}
instance = 1;
break;
default:
return 0;
}
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
break;
case 1:
/* XXX compute */
break;
case 2:
/* XXX compute */
break;
case 3:
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
}
return 0;
@ -1512,12 +1705,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
int instance;
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
switch (entry->client_id) {
case SOC15_IH_CLIENTID_SDMA0:
instance = 0;
break;
case SOC15_IH_CLIENTID_SDMA1:
instance = 1;
break;
default:
return 0;
}
switch (entry->ring_id) {
case 0:
drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
break;
}
return 0;
}
static void sdma_v4_0_update_medium_grain_clock_gating(
struct amdgpu_device *adev,
bool enable)
@ -1730,6 +1940,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
.type = AMDGPU_RING_TYPE_SDMA,
.align_mask = 0xf,
.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
.support_64bit_ptrs = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = sdma_v4_0_ring_get_rptr,
.get_wptr = sdma_v4_0_page_ring_get_wptr,
.set_wptr = sdma_v4_0_page_ring_set_wptr,
.emit_frame_size =
6 + /* sdma_v4_0_ring_emit_hdp_flush */
3 + /* hdp invalidate */
6 + /* sdma_v4_0_ring_emit_pipeline_sync */
/* sdma_v4_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
.emit_ib = sdma_v4_0_ring_emit_ib,
.emit_fence = sdma_v4_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
.test_ring = sdma_v4_0_ring_test_ring,
.test_ib = sdma_v4_0_ring_test_ib,
.insert_nop = sdma_v4_0_ring_insert_nop,
.pad_ib = sdma_v4_0_ring_pad_ib,
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
@ -1737,6 +1979,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
adev->sdma.instance[i].ring.me = i;
if (adev->sdma.has_page_queue) {
adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
adev->sdma.instance[i].page.me = i;
}
}
}
@ -1818,7 +2064,10 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
{
adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
if (adev->sdma.has_page_queue)
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
else
adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
@ -1836,7 +2085,10 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
sched = &adev->sdma.instance[i].ring.sched;
if (adev->sdma.has_page_queue)
sched = &adev->sdma.instance[i].page.sched;
else
sched = &adev->sdma.instance[i].ring.sched;
adev->vm_manager.vm_pte_rqs[i] =
&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}

View File

@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
* Pad as necessary with NOPs.
*/
@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, false);
ring->ready = false;
ring->sched.ready = false;
}
}
@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev)
WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
ring->ready = true;
ring->sched.ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
}
if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
u64 gpu_addr;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
r = amdgpu_ring_alloc(ring, 4);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
if (r)
goto error_free_wb;
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
amdgpu_device_wb_free(adev, index);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
error_free_wb:
amdgpu_device_wb_free(adev, index);
return r;
}
@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
if (r)
return r;
}
gpu_addr = adev->wb.gpu_addr + (index * 4);
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
if (r)
goto err0;
}
ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
ib.ptr[1] = lower_32_bits(gpu_addr);
@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
if (tmp == 0xDEADBEEF)
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
else
r = -EINVAL;
}
err1:
amdgpu_ib_free(adev, &ib, NULL);
@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal instruction in SDMA command stream\n");
schedule_work(&adev->reset_work);
return 0;
}
static int si_dma_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
.process = si_dma_process_trap_irq,
};
static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
.process = si_dma_process_illegal_inst_irq,
};
static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
}
/**

View File

@ -0,0 +1,130 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _TA_XGMI_IF_H
#define _TA_XGMI_IF_H
/* Responses have bit 31 set */
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
enum ta_command_xgmi {
TA_COMMAND_XGMI__INITIALIZE = 0x00,
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04
};
/* XGMI related enumerations */
/**********************************************************/;
enum ta_xgmi_connected_nodes {
TA_XGMI__MAX_CONNECTED_NODES = 64
};
enum ta_xgmi_status {
TA_XGMI_STATUS__SUCCESS = 0x00,
TA_XGMI_STATUS__GENERIC_FAILURE = 0x01,
TA_XGMI_STATUS__NULL_POINTER = 0x02,
TA_XGMI_STATUS__INVALID_PARAMETER = 0x03,
TA_XGMI_STATUS__NOT_INITIALIZED = 0x04,
TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05,
TA_XGMI_STATUS__INVALID_NODE_ID = 0x06,
TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07,
TA_XGMI_STATUS__FAILED_ID_GEN = 0x08,
TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09,
TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A
};
enum ta_xgmi_assigned_sdma_engine {
TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4,
TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5
};
/* input/output structures for XGMI commands */
/**********************************************************/
struct ta_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct ta_xgmi_cmd_initialize_output {
uint32_t status;
};
struct ta_xgmi_cmd_get_node_id_output {
uint64_t node_id;
};
struct ta_xgmi_cmd_get_hive_id_output {
uint64_t hive_id;
};
struct ta_xgmi_cmd_get_topology_info_input {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
struct ta_xgmi_cmd_get_topology_info_output {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
struct ta_xgmi_cmd_set_topology_info_input {
uint32_t num_nodes;
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
/**********************************************************/
/* Common input structure for XGMI callbacks */
union ta_xgmi_cmd_input {
struct ta_xgmi_cmd_get_topology_info_input get_topology_info;
struct ta_xgmi_cmd_set_topology_info_input set_topology_info;
};
/* Common output structure for XGMI callbacks */
union ta_xgmi_cmd_output {
struct ta_xgmi_cmd_initialize_output initialize;
struct ta_xgmi_cmd_get_node_id_output get_node_id;
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
};
/**********************************************************/
struct ta_xgmi_shared_memory {
uint32_t cmd_id;
uint32_t resp_id;
enum ta_xgmi_status xgmi_status;
uint32_t reserved;
union ta_xgmi_cmd_input xgmi_in_message;
union ta_xgmi_cmd_output xgmi_out_message;
};
#endif //_TA_XGMI_IF_H

View File

@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle)
uvd_v4_2_enable_mgcg(adev, true);
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v4_2_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
amdgpu_ring_write(ring, ib->gpu_addr);

View File

@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle)
uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v5_0_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
int r;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
amdgpu_ring_commit(ring);
@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed\n",
ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
if (r)
goto error;
}
r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: IB test timed out.\n");
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
} else {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
}
static int uvd_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle)
uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle)
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst->ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
}
@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle)
if (RREG32(mmUVD_STATUS) != 0)
uvd_v6_0_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
* Write ring commands to execute the indirect buffer
*/
static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
amdgpu_ring_write(ring, vmid);
@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
* Write enc ring commands to execute the indirect buffer
*/
static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
return 0;
r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
ring->me, ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
amdgpu_ring_commit(ring);
@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
ring->me, ring->idx, i);
} else {
DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
ring->me, ring->idx);
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
}
return r;
}
@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
if (r)
goto error;
}
r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
if (r)
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
if (r == 0)
r = -ETIMEDOUT;
} else if (r < 0) {
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
} else {
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
else if (r > 0)
r = 0;
}
error:
dma_fence_put(fence);
return r;
@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle)
ring = &adev->uvd.inst[j].ring;
if (!amdgpu_sriov_vf(adev)) {
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle)
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
ring = &adev->uvd.inst[j].ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
}
done:
@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle)
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
if (adev->uvd.harvest_config & (1 << i))
continue;
adev->uvd.inst[i].ring.ready = false;
adev->uvd.inst[i].ring.sched.ready = false;
}
return 0;
@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
ring->me, ring->idx, r);
if (r)
return r;
}
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
ring->me, ring->idx, i);
} else {
DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
ring->me, ring->idx, tmp);
r = -EINVAL;
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
* Write ring commands to execute the indirect buffer
*/
static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
bool ctx_switch)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
* Write enc ring commands to execute the indirect buffer
*/
static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle)
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
for (i = 0; i < adev->vce.num_rings; i++) {
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
if (r)
return r;
else
adev->vce.ring[i].ready = true;
}
DRM_INFO("VCE initialized successfully.\n");

View File

@ -474,15 +474,10 @@ static int vce_v3_0_hw_init(void *handle)
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
for (i = 0; i < adev->vce.num_rings; i++) {
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
if (r)
return r;
else
adev->vce.ring[i].ready = true;
}
DRM_INFO("VCE initialized successfully.\n");
@ -838,8 +833,12 @@ out:
}
static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, VCE_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@ -519,15 +519,10 @@ static int vce_v4_0_hw_init(void *handle)
if (r)
return r;
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
for (i = 0; i < adev->vce.num_rings; i++) {
r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
if (r)
return r;
else
adev->vce.ring[i].ready = true;
}
DRM_INFO("VCE initialized successfully.\n");
@ -549,7 +544,7 @@ static int vce_v4_0_hw_fini(void *handle)
}
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
adev->vce.ring[i].sched.ready = false;
return 0;
}
@ -951,9 +946,11 @@ static int vce_v4_0_set_powergating_state(void *handle,
}
#endif
static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, VCE_CMD_IB_VM);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));

View File

@ -176,30 +176,22 @@ static int vcn_v1_0_hw_init(void *handle)
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
int i, r;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
ring->sched.ready = true;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
}
ring = &adev->vcn.ring_jpeg;
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
done:
if (!r)
@ -224,7 +216,7 @@ static int vcn_v1_0_hw_fini(void *handle)
if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
vcn_v1_0_stop(adev);
ring->ready = false;
ring->sched.ready = false;
return 0;
}
@ -1366,10 +1358,12 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
* Write ring commands to execute the indirect buffer
*/
static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
@ -1524,8 +1518,12 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
* Write enc ring commands to execute the indirect buffer
*/
static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@ -1725,10 +1723,12 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6
* Write ring commands to execute the indirect buffer.
*/
static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
struct amdgpu_job *job,
struct amdgpu_ib *ib,
bool ctx_switch)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));

View File

@ -23,6 +23,7 @@
#include "kfd_priv.h"
#include "kfd_events.h"
#include "cik_int.h"
#include "amdgpu_amdkfd.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
@ -107,7 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
kfd_process_vm_fault(dev->dqm, pasid);
memset(&info, 0, sizeof(info));
dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info);
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info);
if (!info.page_addr && !info.status)
return;

View File

@ -68,6 +68,4 @@
#define GRBM_GFX_INDEX 0x30800
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
#endif

View File

@ -37,6 +37,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "amdgpu_amdkfd.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@ -834,8 +835,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
dev = kfd_device_by_id(args->gpu_id);
if (dev)
/* Reading GPU clock counter from KGD */
args->gpu_clock_counter =
dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
else
/* Node without GPU resource */
args->gpu_clock_counter = 0;
@ -1042,7 +1042,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
}
mutex_unlock(&p->mutex);
err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
@ -1240,7 +1240,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
if (dev->device_info->needs_iommu_device)
return false;
dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
if (mem_info.local_mem_size_private == 0 &&
mem_info.local_mem_size_public > 0)
return true;
@ -1281,7 +1281,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
err = dev->kfd2kgd->alloc_memory_of_gpu(
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) &mem, &offset,
flags);
@ -1303,7 +1303,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0;
err_free:
dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
err_unlock:
mutex_unlock(&p->mutex);
return err;
@ -1338,7 +1338,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
goto err_unlock;
}
ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
(struct kgd_mem *)mem);
/* If freeing the buffer failed, leave the handle in place for
* clean-up during process tear-down.
@ -1418,7 +1419,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
err = PTR_ERR(peer_pdd);
goto get_mem_obj_from_handle_failed;
}
err = peer->kfd2kgd->map_memory_to_gpu(
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
if (err) {
pr_err("Failed to map to gpu %d/%d\n",
@ -1430,7 +1431,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
mutex_unlock(&p->mutex);
err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
@ -1525,7 +1526,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
err = -ENODEV;
goto get_mem_obj_from_handle_failed;
}
err = dev->kfd2kgd->unmap_memory_to_gpu(
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
if (err) {
pr_err("Failed to unmap from gpu %d/%d\n",

View File

@ -26,6 +26,7 @@
#include "kfd_priv.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@ -753,12 +754,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
return -ENODATA;
}
pcrat_image = kmalloc(crat_table->length, GFP_KERNEL);
pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL);
if (!pcrat_image)
return -ENOMEM;
memcpy(pcrat_image, crat_table, crat_table->length);
*crat_image = pcrat_image;
*size = crat_table->length;
@ -1161,7 +1160,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info);
amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info);
cu->num_simd_per_cu = cu_info.simd_per_cu;
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
cu->max_waves_simd = cu_info.max_waves_per_simd;
@ -1192,7 +1191,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info);
amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info);
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);

View File

@ -28,6 +28,7 @@
#include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
#define MQD_SIZE_ALIGNED 768
@ -478,7 +479,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
if (kfd->kfd2kgd->init_gtt_mem_allocation(
if (amdgpu_amdkfd_alloc_gtt_mem(
kfd->kgd, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
@ -552,7 +553,7 @@ kfd_topology_add_device_error:
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@ -569,7 +570,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_topology_remove_device(kfd);
kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
}
kfree(kfd);
@ -681,6 +682,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
bool is_patched = false;
unsigned long flags;
if (!kfd->init_complete)
return;
@ -690,7 +692,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
return;
}
spin_lock(&kfd->interrupt_lock);
spin_lock_irqsave(&kfd->interrupt_lock, flags);
if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry,
@ -699,7 +701,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
is_patched ? patched_ihre : ih_ring_entry))
queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
int kgd2kfd_quiesce_mm(struct mm_struct *mm)

View File

@ -33,6 +33,7 @@
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
@ -219,7 +220,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
if (ret)
return ret;
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
pmf->release_mem_size / sizeof(uint32_t));
}
@ -672,7 +673,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@ -743,7 +744,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
dqm_lock(dqm);
if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@ -793,7 +794,7 @@ static int register_process(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
/* Retrieve PD base */
pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
dqm_lock(dqm);
list_add(&n->list, &dqm->queues);
@ -805,7 +806,7 @@ static int register_process(struct device_queue_manager *dqm,
retval = dqm->asic_ops.update_qpd(dqm, qpd);
if (dqm->processes_count++ == 0)
dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false);
amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false);
dqm_unlock(dqm);
@ -829,7 +830,7 @@ static int unregister_process(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
if (--dqm->processes_count == 0)
dqm->dev->kfd2kgd->set_compute_idle(
amdgpu_amdkfd_set_compute_idle(
dqm->dev->kgd, true);
goto out;
}
@ -845,15 +846,8 @@ static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
unsigned int vmid)
{
uint32_t pasid_mapping;
pasid_mapping = (pasid == 0) ? 0 :
(uint32_t)pasid |
ATC_VMID_PASID_MAPPING_VALID;
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->kgd, pasid_mapping,
vmid);
dqm->dev->kgd, pasid, vmid);
}
static void init_interrupts(struct device_queue_manager *dqm)
@ -1796,7 +1790,7 @@ static void kfd_process_hw_exception(struct work_struct *work)
{
struct device_queue_manager *dqm = container_of(work,
struct device_queue_manager, hw_exception_work);
dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd);
amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
}
#if defined(CONFIG_DEBUG_FS)

View File

@ -22,6 +22,7 @@
*/
#include "kfd_mqd_manager.h"
#include "amdgpu_amdkfd.h"
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
@ -58,7 +59,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
uint32_t cu_per_sh[4] = {0};
int i, se, cu = 0;
mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info);
if (cu_mask_count > cu_info.cu_active_number)
cu_mask_count = cu_info.cu_active_number;

View File

@ -30,6 +30,7 @@
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "sdma0/sdma0_4_0_sh_mask.h"
#include "amdgpu_amdkfd.h"
static inline struct v9_mqd *get_mqd(void *mqd)
{
@ -83,7 +84,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!*mqd_mem_obj)
return -ENOMEM;
retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&((*mqd_mem_obj)->gtt_mem),
@ -250,7 +251,7 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_dev *kfd = mm->dev;
if (mqd_mem_obj->gtt_mem) {
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);

View File

@ -22,6 +22,7 @@
#include <linux/types.h>
#include "kfd_priv.h"
#include "amdgpu_ids.h"
static unsigned int pasid_bits = 16;
static const struct kfd2kgd_calls *kfd2kgd;
@ -71,7 +72,7 @@ unsigned int kfd_pasid_alloc(void)
return false;
}
r = kfd2kgd->alloc_pasid(pasid_bits);
r = amdgpu_pasid_alloc(pasid_bits);
return r > 0 ? r : 0;
}
@ -79,5 +80,5 @@ unsigned int kfd_pasid_alloc(void)
void kfd_pasid_free(unsigned int pasid)
{
if (kfd2kgd)
kfd2kgd->free_pasid(pasid);
amdgpu_pasid_free(pasid);
}

View File

@ -507,6 +507,7 @@ struct qcm_process_device {
* All the memory management data should be here too
*/
uint64_t gds_context_area;
/* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
uint64_t page_table_base;
uint32_t sh_mem_config;
uint32_t sh_mem_bases;

View File

@ -31,6 +31,7 @@
#include <linux/compat.h>
#include <linux/mman.h>
#include <linux/file.h>
#include "amdgpu_amdkfd.h"
struct mm_struct;
@ -100,8 +101,8 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
{
struct kfd_dev *dev = pdd->dev;
dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem);
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
}
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@ -119,16 +120,16 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
int handle;
int err;
err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
pdd->vm, &mem, NULL, flags);
if (err)
goto err_alloc_mem;
err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
if (err)
goto err_map_mem;
err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true);
err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
@ -147,7 +148,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
}
if (kptr) {
err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd,
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
(struct kgd_mem *)mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
@ -165,7 +166,7 @@ sync_memory_failed:
return err;
err_map_mem:
kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
err_alloc_mem:
*kptr = NULL;
return err;
@ -296,11 +297,11 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
per_device_list) {
if (!peer_pdd->vm)
continue;
peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer_pdd->dev->kgd, mem, peer_pdd->vm);
}
pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
kfd_process_device_remove_obj_handle(pdd, id);
}
}
@ -323,11 +324,12 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pdd->dev->id, p->pasid);
if (pdd->drm_file) {
pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm);
amdgpu_amdkfd_gpuvm_release_process_vm(
pdd->dev->kgd, pdd->vm);
fput(pdd->drm_file);
}
else if (pdd->vm)
pdd->dev->kfd2kgd->destroy_process_vm(
amdgpu_amdkfd_gpuvm_destroy_process_vm(
pdd->dev->kgd, pdd->vm);
list_del(&pdd->per_device_list);
@ -688,12 +690,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
dev = pdd->dev;
if (drm_file)
ret = dev->kfd2kgd->acquire_process_vm(
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
dev->kgd, drm_file, p->pasid,
&pdd->vm, &p->kgd_process_info, &p->ef);
else
ret = dev->kfd2kgd->create_process_vm(
dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef);
ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid,
&pdd->vm, &p->kgd_process_info, &p->ef);
if (ret) {
pr_err("Failed to create process VM object\n");
return ret;
@ -714,7 +716,7 @@ err_init_cwsr:
err_reserve_ib_mem:
kfd_process_device_free_bos(pdd);
if (!drm_file)
dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm);
pdd->vm = NULL;
return ret;
@ -972,7 +974,7 @@ static void restore_process_worker(struct work_struct *work)
*/
p->last_restore_timestamp = get_jiffies_64();
ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
&p->ef);
if (ret) {
pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",

View File

@ -36,6 +36,7 @@
#include "kfd_topology.h"
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "amdgpu_amdkfd.h"
/* topology_device_list - Master list of all topology devices */
static struct list_head topology_device_list;
@ -1052,7 +1053,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
if (!gpu)
return 0;
gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
local_mem_size = local_mem_info.local_mem_size_private +
local_mem_info.local_mem_size_public;
@ -1118,8 +1119,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
* for APUs - If CRAT from ACPI reports more than one bank, then
* all the banks will report the same mem_clk_max information
*/
dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
&local_mem_info);
amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
list_for_each_entry(mem, &dev->mem_props, list)
mem->mem_clk_max = local_mem_info.mem_clk_max;
@ -1240,7 +1240,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* needed for the topology
*/
dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
dev->node_props.simd_arrays_per_engine =
cu_info.num_shader_arrays_per_engine;
@ -1249,7 +1249,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
gpu->pdev->devfn);
dev->node_props.max_engine_clk_fcompute =
dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
dev->node_props.max_engine_clk_ccompute =
cpufreq_quick_get_max(0) / 1000;
dev->node_props.drm_render_minor =

View File

@ -76,6 +76,16 @@
#define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
/**
* DOC: overview
*
* The AMDgpu display manager, **amdgpu_dm** (or even simpler,
* **dm**) sits between DRM and DC. It acts as a liason, converting DRM
* requests into DC requests, and DC responses into DRM responses.
*
* The root control structure is &struct amdgpu_display_manager.
*/
/* basic init/fini API */
static int amdgpu_dm_init(struct amdgpu_device *adev);
static void amdgpu_dm_fini(struct amdgpu_device *adev);
@ -95,7 +105,7 @@ static void
amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector);
static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
struct amdgpu_plane *aplane,
struct drm_plane *plane,
unsigned long possible_crtcs);
static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
@ -379,11 +389,6 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
}
/*
* Init display KMS
*
* Returns 0 on success
*/
static int amdgpu_dm_init(struct amdgpu_device *adev)
{
struct dc_init_data init_data;
@ -663,6 +668,26 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
drm_modeset_unlock(&dev->mode_config.connection_mutex);
}
/**
* dm_hw_init() - Initialize DC device
* @handle: The base driver device containing the amdpgu_dm device.
*
* Initialize the &struct amdgpu_display_manager device. This involves calling
* the initializers of each DM component, then populating the struct with them.
*
* Although the function implies hardware initialization, both hardware and
* software are initialized here. Splitting them out to their relevant init
* hooks is a future TODO item.
*
* Some notable things that are initialized here:
*
* - Display Core, both software and hardware
* - DC modules that we need (freesync and color management)
* - DRM software states
* - Interrupt sources and handlers
* - Vblank support
* - Debug FS entries, if enabled
*/
static int dm_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -673,6 +698,14 @@ static int dm_hw_init(void *handle)
return 0;
}
/**
* dm_hw_fini() - Teardown DC device
* @handle: The base driver device containing the amdpgu_dm device.
*
* Teardown components within &struct amdgpu_display_manager that require
* cleanup. This involves cleaning up the DRM device, DC, and any modules that
* were loaded. Also flush IRQ workqueues and disable them.
*/
static int dm_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@ -898,6 +931,16 @@ static int dm_resume(void *handle)
return ret;
}
/**
* DOC: DM Lifecycle
*
* DM (and consequently DC) is registered in the amdgpu base driver as a IP
* block. When CONFIG_DRM_AMD_DC is enabled, the DM device IP block is added to
* the base driver's device list to be initialized and torn down accordingly.
*
* The functions to do so are provided as hooks in &struct amd_ip_funcs.
*/
static const struct amd_ip_funcs amdgpu_dm_funcs = {
.name = "dm",
.early_init = dm_early_init,
@ -965,6 +1008,12 @@ dm_atomic_state_alloc_free(struct drm_atomic_state *state)
kfree(dm_state);
}
/**
* DOC: atomic
*
* *WIP*
*/
static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
.fb_create = amdgpu_display_user_framebuffer_create,
.output_poll_changed = drm_fb_helper_output_poll_changed,
@ -1527,8 +1576,23 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
{
struct amdgpu_display_manager *dm = bl_get_data(bd);
/* backlight_pwm_u16_16 parameter is in unsigned 32 bit, 16 bit integer
* and 16 bit fractional, where 1.0 is max backlight value.
* bd->props.brightness is 8 bit format and needs to be converted by
* scaling via copy lower byte to upper byte of 16 bit value.
*/
uint32_t brightness = bd->props.brightness * 0x101;
/*
* PWM interperts 0 as 100% rather than 0% because of HW
* limitation for level 0. So limiting minimum brightness level
* to 1.
*/
if (bd->props.brightness < 1)
brightness = 0x101;
if (dc_link_set_backlight_level(dm->backlight_link,
bd->props.brightness, 0, 0))
brightness, 0, 0))
return 0;
else
return 1;
@ -1580,18 +1644,18 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
struct amdgpu_mode_info *mode_info,
int plane_id)
{
struct amdgpu_plane *plane;
struct drm_plane *plane;
unsigned long possible_crtcs;
int ret = 0;
plane = kzalloc(sizeof(struct amdgpu_plane), GFP_KERNEL);
plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL);
mode_info->planes[plane_id] = plane;
if (!plane) {
DRM_ERROR("KMS: Failed to allocate plane\n");
return -ENOMEM;
}
plane->base.type = mode_info->plane_type[plane_id];
plane->type = mode_info->plane_type[plane_id];
/*
* HACK: IGT tests expect that each plane can only have
@ -1682,7 +1746,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
}
for (i = 0; i < dm->dc->caps.max_streams; i++)
if (amdgpu_dm_crtc_init(dm, &mode_info->planes[i]->base, i)) {
if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) {
DRM_ERROR("KMS: Failed to initialize crtc\n");
goto fail;
}
@ -3457,49 +3521,49 @@ static const u32 cursor_formats[] = {
};
static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
struct amdgpu_plane *aplane,
struct drm_plane *plane,
unsigned long possible_crtcs)
{
int res = -EPERM;
switch (aplane->base.type) {
switch (plane->type) {
case DRM_PLANE_TYPE_PRIMARY:
res = drm_universal_plane_init(
dm->adev->ddev,
&aplane->base,
plane,
possible_crtcs,
&dm_plane_funcs,
rgb_formats,
ARRAY_SIZE(rgb_formats),
NULL, aplane->base.type, NULL);
NULL, plane->type, NULL);
break;
case DRM_PLANE_TYPE_OVERLAY:
res = drm_universal_plane_init(
dm->adev->ddev,
&aplane->base,
plane,
possible_crtcs,
&dm_plane_funcs,
yuv_formats,
ARRAY_SIZE(yuv_formats),
NULL, aplane->base.type, NULL);
NULL, plane->type, NULL);
break;
case DRM_PLANE_TYPE_CURSOR:
res = drm_universal_plane_init(
dm->adev->ddev,
&aplane->base,
plane,
possible_crtcs,
&dm_plane_funcs,
cursor_formats,
ARRAY_SIZE(cursor_formats),
NULL, aplane->base.type, NULL);
NULL, plane->type, NULL);
break;
}
drm_plane_helper_add(&aplane->base, &dm_plane_helper_funcs);
drm_plane_helper_add(plane, &dm_plane_helper_funcs);
/* Create (reset) the plane state */
if (aplane->base.funcs->reset)
aplane->base.funcs->reset(&aplane->base);
if (plane->funcs->reset)
plane->funcs->reset(plane);
return res;
@ -3510,7 +3574,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
uint32_t crtc_index)
{
struct amdgpu_crtc *acrtc = NULL;
struct amdgpu_plane *cursor_plane;
struct drm_plane *cursor_plane;
int res = -ENOMEM;
@ -3518,7 +3582,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
if (!cursor_plane)
goto fail;
cursor_plane->base.type = DRM_PLANE_TYPE_CURSOR;
cursor_plane->type = DRM_PLANE_TYPE_CURSOR;
res = amdgpu_dm_plane_init(dm, cursor_plane, 0);
acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL);
@ -3529,7 +3593,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
dm->ddev,
&acrtc->base,
plane,
&cursor_plane->base,
cursor_plane,
&amdgpu_dm_crtc_funcs, NULL);
if (res)
@ -3768,12 +3832,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
case DRM_MODE_CONNECTOR_HDMIA:
aconnector->base.polled = DRM_CONNECTOR_POLL_HPD;
aconnector->base.ycbcr_420_allowed =
link->link_enc->features.ycbcr420_supported ? true : false;
link->link_enc->features.hdmi_ycbcr420_supported ? true : false;
break;
case DRM_MODE_CONNECTOR_DisplayPort:
aconnector->base.polled = DRM_CONNECTOR_POLL_HPD;
aconnector->base.ycbcr_420_allowed =
link->link_enc->features.ycbcr420_supported ? true : false;
link->link_enc->features.dp_ycbcr420_supported ? true : false;
break;
case DRM_MODE_CONNECTOR_DVID:
aconnector->base.polled = DRM_CONNECTOR_POLL_HPD;
@ -4531,6 +4595,14 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev,
/*TODO Handle EINTR, reenable IRQ*/
}
/**
* amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
* @state: The atomic state to commit
*
* This will tell DC to commit the constructed DC state from atomic_check,
* programming the hardware. Any failures here implies a hardware failure, since
* atomic check should have filtered anything non-kosher.
*/
static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
@ -5302,6 +5374,12 @@ enum surface_update_type dm_determine_update_type_for_commit(struct dc *dc, stru
struct dc_stream_update stream_update;
enum surface_update_type update_type = UPDATE_TYPE_FAST;
if (!updates || !surface) {
DRM_ERROR("Plane or surface update failed to allocate");
/* Set type to FULL to avoid crashing in DC*/
update_type = UPDATE_TYPE_FULL;
goto ret;
}
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
new_dm_crtc_state = to_dm_crtc_state(new_crtc_state);
@ -5377,6 +5455,31 @@ ret:
return update_type;
}
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
* @dev: The DRM device
* @state: The atomic state to commit
*
* Validate that the given atomic state is programmable by DC into hardware.
* This involves constructing a &struct dc_state reflecting the new hardware
* state we wish to commit, then querying DC to see if it is programmable. It's
* important not to modify the existing DC state. Otherwise, atomic_check
* may unexpectedly commit hardware changes.
*
* When validating the DC state, it's important that the right locks are
* acquired. For full updates case which removes/adds/updates streams on one
* CRTC while flipping on another CRTC, acquiring global lock will guarantee
* that any such full update commit will wait for completion of any outstanding
* flip using DRMs synchronization events. See
* dm_determine_update_type_for_commit()
*
* Note that DM adds the affected connectors for all CRTCs in state, when that
* might not seem necessary. This is because DC stream creation requires the
* DC sink, which is tied to the DRM connector state. Cleaning this up should
* be possible but non-trivial - a possible TODO item.
*
* Return: -Error code if validation failed.
*/
static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct drm_atomic_state *state)
{
@ -5479,15 +5582,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
lock_and_validation_needed = true;
}
/*
* For full updates case when
* removing/adding/updating streams on one CRTC while flipping
* on another CRTC,
* acquiring global lock will guarantee that any such full
* update commit
* will wait for completion of any outstanding flip using DRMs
* synchronization events.
*/
update_type = dm_determine_update_type_for_commit(dc, state);
if (overall_update_type < update_type)

View File

@ -59,49 +59,100 @@ struct common_irq_params {
enum dc_irq_source irq_src;
};
/**
* struct irq_list_head - Linked-list for low context IRQ handlers.
*
* @head: The list_head within &struct handler_data
* @work: A work_struct containing the deferred handler work
*/
struct irq_list_head {
struct list_head head;
/* In case this interrupt needs post-processing, 'work' will be queued*/
struct work_struct work;
};
/**
* struct dm_compressor_info - Buffer info used by frame buffer compression
* @cpu_addr: MMIO cpu addr
* @bo_ptr: Pointer to the buffer object
* @gpu_addr: MMIO gpu addr
*/
struct dm_comressor_info {
void *cpu_addr;
struct amdgpu_bo *bo_ptr;
uint64_t gpu_addr;
};
/**
* struct amdgpu_display_manager - Central amdgpu display manager device
*
* @dc: Display Core control structure
* @adev: AMDGPU base driver structure
* @ddev: DRM base driver structure
* @display_indexes_num: Max number of display streams supported
* @irq_handler_list_table_lock: Synchronizes access to IRQ tables
* @backlight_dev: Backlight control device
* @cached_state: Caches device atomic state for suspend/resume
* @compressor: Frame buffer compression buffer. See &struct dm_comressor_info
*/
struct amdgpu_display_manager {
struct dc *dc;
/**
* @cgs_device:
*
* The Common Graphics Services device. It provides an interface for
* accessing registers.
*/
struct cgs_device *cgs_device;
struct amdgpu_device *adev; /*AMD base driver*/
struct drm_device *ddev; /*DRM base driver*/
struct amdgpu_device *adev;
struct drm_device *ddev;
u16 display_indexes_num;
/*
* 'irq_source_handler_table' holds a list of handlers
* per (DAL) IRQ source.
/**
* @irq_handler_list_low_tab:
*
* Each IRQ source may need to be handled at different contexts.
* By 'context' we mean, for example:
* - The ISR context, which is the direct interrupt handler.
* - The 'deferred' context - this is the post-processing of the
* interrupt, but at a lower priority.
* Low priority IRQ handler table.
*
* It is a n*m table consisting of n IRQ sources, and m handlers per IRQ
* source. Low priority IRQ handlers are deferred to a workqueue to be
* processed. Hence, they can sleep.
*
* Note that handlers are called in the same order as they were
* registered (FIFO).
*/
struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
/**
* @irq_handler_list_high_tab:
*
* High priority IRQ handler table.
*
* It is a n*m table, same as &irq_handler_list_low_tab. However,
* handlers in this table are not deferred and are called immediately.
*/
struct list_head irq_handler_list_high_tab[DAL_IRQ_SOURCES_NUMBER];
/**
* @pflip_params:
*
* Page flip IRQ parameters, passed to registered handlers when
* triggered.
*/
struct common_irq_params
pflip_params[DC_IRQ_SOURCE_PFLIP_LAST - DC_IRQ_SOURCE_PFLIP_FIRST + 1];
/**
* @vblank_params:
*
* Vertical blanking IRQ parameters, passed to registered handlers when
* triggered.
*/
struct common_irq_params
vblank_params[DC_IRQ_SOURCE_VBLANK6 - DC_IRQ_SOURCE_VBLANK1 + 1];
/* this spin lock synchronizes access to 'irq_handler_list_table' */
spinlock_t irq_handler_list_table_lock;
struct backlight_device *backlight_dev;
@ -110,9 +161,6 @@ struct amdgpu_display_manager {
struct mod_freesync *freesync_module;
/**
* Caches device atomic state for suspend/resume
*/
struct drm_atomic_state *cached_state;
struct dm_comressor_info compressor;

View File

@ -164,7 +164,7 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc)
*/
stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
ret = mod_color_calculate_regamma_params(stream->out_transfer_func,
gamma, true, adev->asic_type <= CHIP_RAVEN);
gamma, true, adev->asic_type <= CHIP_RAVEN, NULL);
dc_gamma_release(&gamma);
if (!ret) {
stream->out_transfer_func->type = old_type;

View File

@ -75,6 +75,11 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
return -EINVAL;
}
if (!stream_state) {
DRM_ERROR("No stream state for CRTC%d\n", crtc->index);
return -EINVAL;
}
/* When enabling CRC, we should also disable dithering. */
if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) {
if (dc_stream_configure_crc(stream_state->ctx->dc,

View File

@ -32,16 +32,55 @@
#include "amdgpu_dm.h"
#include "amdgpu_dm_irq.h"
/**
* DOC: overview
*
* DM provides another layer of IRQ management on top of what the base driver
* already provides. This is something that could be cleaned up, and is a
* future TODO item.
*
* The base driver provides IRQ source registration with DRM, handler
* registration into the base driver's IRQ table, and a handler callback
* amdgpu_irq_handler(), with which DRM calls on interrupts. This generic
* handler looks up the IRQ table, and calls the respective
* &amdgpu_irq_src_funcs.process hookups.
*
* What DM provides on top are two IRQ tables specifically for top-half and
* bottom-half IRQ handling, with the bottom-half implementing workqueues:
*
* - &amdgpu_display_manager.irq_handler_list_high_tab
* - &amdgpu_display_manager.irq_handler_list_low_tab
*
* They override the base driver's IRQ table, and the effect can be seen
* in the hooks that DM provides for &amdgpu_irq_src_funcs.process. They
* are all set to the DM generic handler amdgpu_dm_irq_handler(), which looks up
* DM's IRQ tables. However, in order for base driver to recognize this hook, DM
* still needs to register the IRQ with the base driver. See
* dce110_register_irq_handlers() and dcn10_register_irq_handlers().
*
* To expose DC's hardware interrupt toggle to the base driver, DM implements
* &amdgpu_irq_src_funcs.set hooks. Base driver calls it through
* amdgpu_irq_update() to enable or disable the interrupt.
*/
/******************************************************************************
* Private declarations.
*****************************************************************************/
/**
* struct amdgpu_dm_irq_handler_data - Data for DM interrupt handlers.
*
* @list: Linked list entry referencing the next/previous handler
* @handler: Handler function
* @handler_arg: Argument passed to the handler when triggered
* @dm: DM which this handler belongs to
* @irq_source: DC interrupt source that this handler is registered for
*/
struct amdgpu_dm_irq_handler_data {
struct list_head list;
interrupt_handler handler;
void *handler_arg;
/* DM which this handler belongs to */
struct amdgpu_display_manager *dm;
/* DAL irq source which registered for this interrupt. */
enum dc_irq_source irq_source;
@ -68,7 +107,7 @@ static void init_handler_common_data(struct amdgpu_dm_irq_handler_data *hcd,
}
/**
* dm_irq_work_func - Handle an IRQ outside of the interrupt handler proper.
* dm_irq_work_func() - Handle an IRQ outside of the interrupt handler proper.
*
* @work: work struct
*/
@ -99,8 +138,8 @@ static void dm_irq_work_func(struct work_struct *work)
* (The most common use is HPD interrupt) */
}
/**
* Remove a handler and return a pointer to hander list from which the
/*
* Remove a handler and return a pointer to handler list from which the
* handler was removed.
*/
static struct list_head *remove_irq_handler(struct amdgpu_device *adev,
@ -203,6 +242,24 @@ static bool validate_irq_unregistration_params(enum dc_irq_source irq_source,
* Note: caller is responsible for input validation.
*****************************************************************************/
/**
* amdgpu_dm_irq_register_interrupt() - Register a handler within DM.
* @adev: The base driver device containing the DM device.
* @int_params: Interrupt parameters containing the source, and handler context
* @ih: Function pointer to the interrupt handler to register
* @handler_args: Arguments passed to the handler when the interrupt occurs
*
* Register an interrupt handler for the given IRQ source, under the given
* context. The context can either be high or low. High context handlers are
* executed directly within ISR context, while low context is executed within a
* workqueue, thereby allowing operations that sleep.
*
* Registered handlers are called in a FIFO manner, i.e. the most recently
* registered handler will be called first.
*
* Return: Handler data &struct amdgpu_dm_irq_handler_data containing the IRQ
* source, handler function, and args
*/
void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev,
struct dc_interrupt_params *int_params,
void (*ih)(void *),
@ -261,6 +318,15 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev,
return handler_data;
}
/**
* amdgpu_dm_irq_unregister_interrupt() - Remove a handler from the DM IRQ table
* @adev: The base driver device containing the DM device
* @irq_source: IRQ source to remove the given handler from
* @ih: Function pointer to the interrupt handler to unregister
*
* Go through both low and high context IRQ tables, and find the given handler
* for the given irq source. If found, remove it. Otherwise, do nothing.
*/
void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
enum dc_irq_source irq_source,
void *ih)
@ -295,6 +361,20 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
}
}
/**
* amdgpu_dm_irq_init() - Initialize DM IRQ management
* @adev: The base driver device containing the DM device
*
* Initialize DM's high and low context IRQ tables.
*
* The N by M table contains N IRQ sources, with M
* &struct amdgpu_dm_irq_handler_data hooked together in a linked list. The
* list_heads are initialized here. When an interrupt n is triggered, all m
* handlers are called in sequence, FIFO according to registration order.
*
* The low context table requires special steps to initialize, since handlers
* will be deferred to a workqueue. See &struct irq_list_head.
*/
int amdgpu_dm_irq_init(struct amdgpu_device *adev)
{
int src;
@ -317,7 +397,12 @@ int amdgpu_dm_irq_init(struct amdgpu_device *adev)
return 0;
}
/* DM IRQ and timer resource release */
/**
* amdgpu_dm_irq_fini() - Tear down DM IRQ management
* @adev: The base driver device containing the DM device
*
* Flush all work within the low context IRQ table.
*/
void amdgpu_dm_irq_fini(struct amdgpu_device *adev)
{
int src;
@ -414,7 +499,7 @@ int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
return 0;
}
/**
/*
* amdgpu_dm_irq_schedule_work - schedule all work items registered for the
* "irq_source".
*/
@ -439,8 +524,9 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev,
}
/** amdgpu_dm_irq_immediate_work
* Callback high irq work immediately, don't send to work queue
/*
* amdgpu_dm_irq_immediate_work
* Callback high irq work immediately, don't send to work queue
*/
static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev,
enum dc_irq_source irq_source)
@ -467,11 +553,14 @@ static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev,
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
}
/*
* amdgpu_dm_irq_handler
/**
* amdgpu_dm_irq_handler - Generic DM IRQ handler
* @adev: amdgpu base driver device containing the DM device
* @source: Unused
* @entry: Data about the triggered interrupt
*
* Generic IRQ handler, calls all registered high irq work immediately, and
* schedules work for low irq
* Calls all registered high irq work immediately, and schedules work for low
* irq. The DM IRQ table is used to find the corresponding handlers.
*/
static int amdgpu_dm_irq_handler(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
@ -613,7 +702,7 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
adev->hpd_irq.funcs = &dm_hpd_irq_funcs;
}
/*
/**
* amdgpu_dm_hpd_init - hpd setup callback.
*
* @adev: amdgpu_device pointer

View File

@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp,
return;
clock.clock_type = amd_pp_dcf_clock;
clock.clock_freq_in_khz = req->hard_min_dcefclk_khz;
clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000;
pp_funcs->display_clock_voltage_request(pp_handle, &clock);
clock.clock_type = amd_pp_f_clock;
clock.clock_freq_in_khz = req->hard_min_fclk_khz;
clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000;
pp_funcs->display_clock_voltage_request(pp_handle, &clock);
}
@ -518,13 +518,13 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
wm_dce_clocks[i].wm_set_id =
ranges->reader_wm_sets[i].wm_inst;
wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz =
ranges->reader_wm_sets[i].max_drain_clk_khz;
ranges->reader_wm_sets[i].max_drain_clk_mhz * 1000;
wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz =
ranges->reader_wm_sets[i].min_drain_clk_khz;
ranges->reader_wm_sets[i].min_drain_clk_mhz * 1000;
wm_dce_clocks[i].wm_max_mem_clk_in_khz =
ranges->reader_wm_sets[i].max_fill_clk_khz;
ranges->reader_wm_sets[i].max_fill_clk_mhz * 1000;
wm_dce_clocks[i].wm_min_mem_clk_in_khz =
ranges->reader_wm_sets[i].min_fill_clk_khz;
ranges->reader_wm_sets[i].min_fill_clk_mhz * 1000;
}
for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) {
@ -534,13 +534,13 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
wm_soc_clocks[i].wm_set_id =
ranges->writer_wm_sets[i].wm_inst;
wm_soc_clocks[i].wm_max_socclk_clk_in_khz =
ranges->writer_wm_sets[i].max_fill_clk_khz;
ranges->writer_wm_sets[i].max_fill_clk_mhz * 1000;
wm_soc_clocks[i].wm_min_socclk_clk_in_khz =
ranges->writer_wm_sets[i].min_fill_clk_khz;
ranges->writer_wm_sets[i].min_fill_clk_mhz * 1000;
wm_soc_clocks[i].wm_max_mem_clk_in_khz =
ranges->writer_wm_sets[i].max_drain_clk_khz;
ranges->writer_wm_sets[i].max_drain_clk_mhz * 1000;
wm_soc_clocks[i].wm_min_mem_clk_in_khz =
ranges->writer_wm_sets[i].min_drain_clk_khz;
ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000;
}
pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, &wm_with_clock_ranges);

View File

@ -2030,7 +2030,7 @@ static uint32_t get_src_obj_list(struct bios_parser *bp, ATOM_OBJECT *object,
static struct device_id device_type_from_device_id(uint16_t device_id)
{
struct device_id result_device_id;
struct device_id result_device_id = {0};
switch (device_id) {
case ATOM_DEVICE_LCD1_SUPPORT:

View File

@ -1884,6 +1884,8 @@ static const struct dc_vbios_funcs vbios_funcs = {
.is_accelerated_mode = bios_parser_is_accelerated_mode,
.is_active_display = bios_is_active_display,
.set_scratch_critical_state = bios_parser_set_scratch_critical_state,

View File

@ -88,3 +88,96 @@ uint32_t bios_get_vga_enabled_displays(
return active_disp;
}
bool bios_is_active_display(
struct dc_bios *bios,
enum signal_type signal,
const struct connector_device_tag_info *device_tag)
{
uint32_t active = 0;
uint32_t connected = 0;
uint32_t bios_scratch_0 = 0;
uint32_t bios_scratch_3 = 0;
switch (signal) {
case SIGNAL_TYPE_DVI_SINGLE_LINK:
case SIGNAL_TYPE_DVI_DUAL_LINK:
case SIGNAL_TYPE_HDMI_TYPE_A:
case SIGNAL_TYPE_DISPLAY_PORT:
case SIGNAL_TYPE_DISPLAY_PORT_MST:
{
if (device_tag->dev_id.device_type == DEVICE_TYPE_DFP) {
switch (device_tag->dev_id.enum_id) {
case 1:
{
active = ATOM_S3_DFP1_ACTIVE;
connected = 0x0008; //ATOM_DISPLAY_DFP1_CONNECT
}
break;
case 2:
{
active = ATOM_S3_DFP2_ACTIVE;
connected = 0x0080; //ATOM_DISPLAY_DFP2_CONNECT
}
break;
case 3:
{
active = ATOM_S3_DFP3_ACTIVE;
connected = 0x0200; //ATOM_DISPLAY_DFP3_CONNECT
}
break;
case 4:
{
active = ATOM_S3_DFP4_ACTIVE;
connected = 0x0400; //ATOM_DISPLAY_DFP4_CONNECT
}
break;
case 5:
{
active = ATOM_S3_DFP5_ACTIVE;
connected = 0x0800; //ATOM_DISPLAY_DFP5_CONNECT
}
break;
case 6:
{
active = ATOM_S3_DFP6_ACTIVE;
connected = 0x0040; //ATOM_DISPLAY_DFP6_CONNECT
}
break;
default:
break;
}
}
}
break;
case SIGNAL_TYPE_LVDS:
case SIGNAL_TYPE_EDP:
{
active = ATOM_S3_LCD1_ACTIVE;
connected = 0x0002; //ATOM_DISPLAY_LCD1_CONNECT
}
break;
default:
break;
}
if (bios->regs->BIOS_SCRATCH_0) /*follow up with other asic, todo*/
bios_scratch_0 = REG_READ(BIOS_SCRATCH_0);
if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/
bios_scratch_3 = REG_READ(BIOS_SCRATCH_3);
bios_scratch_3 &= ATOM_S3_DEVICE_ACTIVE_MASK;
if ((active & bios_scratch_3) && (connected & bios_scratch_0))
return true;
return false;
}

View File

@ -35,6 +35,10 @@ bool bios_is_accelerated_mode(struct dc_bios *bios);
void bios_set_scratch_acc_mode_change(struct dc_bios *bios);
void bios_set_scratch_critical_state(struct dc_bios *bios, bool state);
uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios);
bool bios_is_active_display(
struct dc_bios *bios,
enum signal_type signal,
const struct connector_device_tag_info *device_tag);
#define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type)))

Some files were not shown because too many files have changed in this diff Show More