1
0
Fork 0

drm/amdgpu: Implement get num of hops between two xgmi device

KFD need to provide the info for upper level to determine the data path

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
alistair/sunxi64-5.4-dsi
shaoyunl 2019-04-17 14:28:18 -04:00 committed by Alex Deucher
parent d8e408a827
commit da361dd13f
5 changed files with 50 additions and 18 deletions

View File

@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00;
@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id;
}
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
{
struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
struct amdgpu_device *adev = (struct amdgpu_device *)dst;
int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
if (ret < 0) {
DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
adev->gmc.xgmi.physical_node_id,
peer_adev->gmc.xgmi.physical_node_id, ret);
ret = 0;
}
return (uint8_t)ret;
}
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
{

View File

@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \
({ \

View File

@ -95,12 +95,26 @@ struct psp_funcs
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
struct psp_xgmi_topology_info top_info;
};
struct psp_ras_context {
@ -181,18 +195,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
struct psp_xgmi_node_info {
uint64_t node_id;
uint8_t num_hops;
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
};
struct psp_xgmi_topology_info {
uint32_t num_nodes;
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))

View File

@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
&hive->topology_info);
&adev->psp.xgmi_context.top_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret;
}
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops;
return -EINVAL;
}
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info *hive_topology;
struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
hive_topology = &hive->topology_info;
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
hive_topology->nodes[count++].node_id = entry->node_id;
top_info->nodes[count++].node_id = entry->node_id;
hive->number_devices = count;
/* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",

View File

@ -27,7 +27,6 @@
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock, reset_lock;
struct kobject *kobj;
@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)