diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 758398bdb39b..14d5b5fa822d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -447,6 +447,24 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, return retval; } +static int kfd_ioctl_get_queue_wave_state(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_queue_wave_state_args *args = data; + int r; + + mutex_lock(&p->mutex); + + r = pqm_get_wave_state(&p->pqm, args->queue_id, + (void __user *)args->ctl_stack_address, + &args->ctl_stack_used_size, + &args->save_area_used_size); + + mutex_unlock(&p->mutex); + + return r; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1615,6 +1633,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, kfd_ioctl_set_cu_mask, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, + kfd_ioctl_get_queue_wave_state, 0) + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ec0d62a16e53..408888911361 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1528,6 +1528,41 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, return retval; } +static int get_wave_state(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct mqd_manager *mqd; + int r; + + dqm_lock(dqm); + + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled) { + r = -EINVAL; + goto dqm_unlock; + } + + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd) { + r = -ENOMEM; + goto dqm_unlock; + } + + if (!mqd->get_wave_state) { + r = -EINVAL; + goto dqm_unlock; + } + + r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size, + save_area_used_size); + +dqm_unlock: + dqm_unlock(dqm); + return r; +} static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) @@ -1649,6 +1684,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.process_termination = process_termination_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; + dqm->ops.get_wave_state = get_wave_state; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1668,6 +1704,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.evict_process_queues = evict_process_queues_nocpsch; dqm->ops.restore_process_queues = restore_process_queues_nocpsch; + dqm->ops.get_wave_state = get_wave_state; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 00da3169a004..e7bd19d09845 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -82,6 +82,8 @@ struct device_process_node { * * @restore_process_queues: Restore all evicted queues queues of a process * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. */ struct device_queue_manager_ops { @@ -137,6 +139,12 @@ struct device_queue_manager_ops { struct qcm_process_device *qpd); int (*restore_process_queues)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + + int (*get_wave_state)(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 4e84052d4e21..f8261313ae7b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -43,6 +43,9 @@ * * @is_occupied: Checks if the relevant HQD slot is occupied. * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. + * * @mqd_mutex: Mqd manager mutex. * * @dev: The kfd device structure coupled with this module. @@ -85,6 +88,11 @@ struct mqd_manager { uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); + int (*get_wave_state)(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); + #if defined(CONFIG_DEBUG_FS) int (*debugfs_show_mqd)(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 0cedb37cf513..f381c1cb27bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -266,6 +266,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct v9_mqd *m; + + /* Control stack is located one page after MQD. */ + void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset; + + if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) + return -EFAULT; + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -435,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index b81fda3754da..6469b3456f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -269,6 +269,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct vi_mqd *m; + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; + + /* Control stack is not copied to user mode for GFXv8 because + * it's part of the context save area that is already + * accessible to user mode + */ + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -436,6 +458,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6f3a5bd489bd..968098bf76dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -862,6 +862,11 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c8cad9c078ae..fcaaf93681ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -408,6 +408,28 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_debug("amdkfd: No queue %d exists for operation\n", + qid); + return -EFAULT; + } + + return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, + pqn->q, + ctl_stack, + ctl_stack_used_size, + save_area_used_size); +} + #if defined(CONFIG_DEBUG_FS) int pqm_debugfs_mqds(struct seq_file *m, void *data) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 01674b56e14f..f5ff8a76e208 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -82,6 +82,14 @@ struct kfd_ioctl_set_cu_mask_args { __u64 cu_mask_ptr; /* to KFD */ }; +struct kfd_ioctl_get_queue_wave_state_args { + uint64_t ctl_stack_address; /* to KFD */ + uint32_t ctl_stack_used_size; /* from KFD */ + uint32_t save_area_used_size; /* from KFD */ + uint32_t queue_id; /* to KFD */ + uint32_t pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -475,7 +483,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_SET_CU_MASK \ AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) +#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1B +#define AMDKFD_COMMAND_END 0x1C #endif