From 32fa821958755a30a9a05ed258e1539c945851d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Aug 2017 23:00:12 -0400 Subject: [PATCH] drm/amdkfd: Handle remaining BUG_ONs more gracefully v2 In most cases, BUG_ONs can be replaced with WARN_ON with an error return. In some void functions just turn them into a WARN_ON and possibly an early exit. v2: * Cleaned up error handling in pm_send_unmap_queue * Removed redundant WARN_ON in kfd_process_destroy_delayed Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++--- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 19 ++++---- .../amd/amdkfd/kfd_device_queue_manager_cik.c | 2 +- .../amd/amdkfd/kfd_device_queue_manager_vi.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 20 ++++++--- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 3 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 3 +- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 44 ++++++++++++------- drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 ++-- .../amd/amdkfd/kfd_process_queue_manager.c | 7 +-- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 +- 14 files changed, 84 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 3841cad0a290..0aa021aa0aa1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -60,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, unsigned int *ib_packet_buff; int status; - BUG_ON(!size_in_bytes); + if (WARN_ON(!size_in_bytes)) + return -EINVAL; kq = dbgdev->kq; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 2d5555c5dfae..3da25f7bda6b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -64,7 +64,8 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; struct kfd_dbgmgr *new_buff; - BUG_ON(!pdev->init_complete); + if (WARN_ON(!pdev->init_complete)) + return false; new_buff = kfd_alloc_struct(new_buff); if (!new_buff) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 416955f7c7d4..f628ac38e0c8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -98,7 +98,7 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { if (supported_devices[i].did == did) { - BUG_ON(!supported_devices[i].device_info); + WARN_ON(!supported_devices[i].device_info); return supported_devices[i].device_info; } } @@ -212,9 +212,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, flags); dev = kfd_device_by_pci_dev(pdev); - BUG_ON(!dev); - - kfd_signal_iommu_event(dev, pasid, address, + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); return AMD_IOMMU_INV_PRI_RSP_INVALID; @@ -397,9 +396,12 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, { unsigned int num_of_longs; - BUG_ON(buf_size < chunk_size); - BUG_ON(buf_size == 0); - BUG_ON(chunk_size == 0); + if (WARN_ON(buf_size < chunk_size)) + return -EINVAL; + if (WARN_ON(buf_size == 0)) + return -EINVAL; + if (WARN_ON(chunk_size == 0)) + return -EINVAL; kfd->gtt_sa_chunk_size = chunk_size; kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2486dfb5b17f..e553c5e45264 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -388,7 +388,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch( { struct mqd_manager *mqd; - BUG_ON(type >= KFD_MQD_TYPE_MAX); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; pr_debug("mqd type %d\n", type); @@ -513,7 +514,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) { int i; - BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) @@ -1129,8 +1130,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: - BUG(); - break; + pr_err("Invalid scheduling policy %d\n", sched_policy); + goto out_free; } switch (dev->device_info->asic_family) { @@ -1143,12 +1144,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; } - if (dqm->ops.initialize(dqm)) { - kfree(dqm); - return NULL; - } + if (!dqm->ops.initialize(dqm)) + return dqm; - return dqm; +out_free: + kfree(dqm); + return NULL; } void device_queue_manager_uninit(struct device_queue_manager *dqm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 43194b43eea4..fadc56a8be71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -65,7 +65,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return PRIVATE_BASE(top_address_nybble << 12) | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 47ef910b1663..15e81ae9d2f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return top_address_nybble << 12 | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 970bc07ac370..0e4d4a98dc2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -41,7 +41,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, int retval; union PM4_MES_TYPE_3_HEADER nop; - BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); + if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) + return false; pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, queue_size); @@ -62,8 +63,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, KFD_MQD_TYPE_HIQ); break; default: - BUG(); - break; + pr_err("Invalid queue type %d\n", type); + return false; } if (!kq->mqd) @@ -305,6 +306,7 @@ void kernel_queue_uninit(struct kernel_queue *kq) kfree(kq); } +/* FIXME: Can this test be removed? */ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) { struct kernel_queue *kq; @@ -314,10 +316,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) pr_err("Starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); - BUG_ON(!kq); + if (unlikely(!kq)) { + pr_err(" Failed to initialize HIQ\n"); + pr_err("Kernel queue test failed\n"); + return; + } retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); - BUG_ON(retval != 0); + if (unlikely(retval != 0)) { + pr_err(" Failed to acquire packet buffer\n"); + pr_err("Kernel queue test failed\n"); + return; + } for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; kq->ops.submit_packet(kq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index a11477dc1047..7e0ec6bb1637 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -387,7 +387,8 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - BUG_ON(type >= KFD_MQD_TYPE_MAX); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index d638c2c92234..f4c8c2324d77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -233,7 +233,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - BUG_ON(type >= KFD_MQD_TYPE_MAX); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index aacd5a3d92b7..0816d11e469d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -35,7 +35,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, { unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); - BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); + WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, + "Runlist IB overflow"); *wptr = temp; } @@ -94,7 +95,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, { int retval; - BUG_ON(pm->allocated); + if (WARN_ON(pm->allocated)) + return -EINVAL; pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); @@ -119,7 +121,8 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, { struct pm4_runlist *packet; - BUG_ON(!ib); + if (WARN_ON(!ib)) + return -EFAULT; packet = (struct pm4_runlist *)buffer; @@ -211,9 +214,8 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - pr_err("queue type %d\n", q->properties.type); - BUG(); - break; + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; } packet->bitfields3.doorbell_offset = q->properties.doorbell_off; @@ -266,8 +268,8 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - BUG(); - break; + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; } packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = @@ -392,14 +394,16 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, - alloc_size_bytes / sizeof(uint32_t), true); + retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + *rl_gpu_addr, + alloc_size_bytes / sizeof(uint32_t), + true); for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); pr_debug("\n"); - return 0; + return retval; } int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) @@ -512,7 +516,8 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, int retval; struct pm4_query_status *packet; - BUG_ON(!fence_address); + if (WARN_ON(!fence_address)) + return -EFAULT; mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer( @@ -577,8 +582,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; default: - BUG(); - break; + WARN(1, "queue type %d", type); + retval = -EINVAL; + goto err_invalid; } if (reset) @@ -610,12 +616,18 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; break; default: - BUG(); - break; + WARN(1, "filter %d", mode); + retval = -EINVAL; + goto err_invalid; } pm->priv_queue->ops.submit_packet(pm->priv_queue); + mutex_unlock(&pm->lock); + return 0; + +err_invalid: + pm->priv_queue->ops.rollback_packet(pm->priv_queue); err_acquire_packet_buffer: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index b3f7d431b9a6..1e06de0bc673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -92,6 +92,6 @@ unsigned int kfd_pasid_alloc(void) void kfd_pasid_free(unsigned int pasid) { - BUG_ON(pasid == 0 || pasid >= pasid_limit); - clear_bit(pasid, pasid_bitmap); + if (!WARN_ON(pasid == 0 || pasid >= pasid_limit)) + clear_bit(pasid, pasid_bitmap); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d030d76cef46..c74cf22a1ed9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -79,8 +79,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) { struct kfd_process *process; - BUG_ON(!kfd_process_wq); - if (!thread->mm) return ERR_PTR(-EINVAL); @@ -202,10 +200,8 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) struct kfd_process_release_work *work; struct kfd_process *p; - BUG_ON(!kfd_process_wq); - p = container_of(rcu, struct kfd_process, rcu); - BUG_ON(atomic_read(&p->mm->mm_count) <= 0); + WARN_ON(atomic_read(&p->mm->mm_count) <= 0); mmdrop(p->mm); @@ -229,7 +225,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, * mmu_notifier srcu is read locked */ p = container_of(mn, struct kfd_process, mmu_notifier); - BUG_ON(p->mm != mm); + if (WARN_ON(p->mm != mm)) + return; mutex_lock(&kfd_processes_mutex); hash_del_rcu(&p->kfd_processes); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f6ecdffbd415..1cae95e2b13a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -218,8 +218,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq, &pdd->qpd); break; default: - BUG(); - break; + WARN(1, "Invalid queue type %d", type); + retval = -EINVAL; } if (retval != 0) { @@ -272,7 +272,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->kq->dev; if (pqn->q) dev = pqn->q->device; - BUG_ON(!dev); + if (WARN_ON(!dev)) + return -ENODEV; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e5486f494c47..19ce59028d6b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -799,10 +799,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, int ret; uint32_t i; + if (WARN_ON(dev->kobj_node)) + return -EEXIST; + /* * Creating the sysfs folders */ - BUG_ON(dev->kobj_node); dev->kobj_node = kfd_alloc_struct(dev->kobj_node); if (!dev->kobj_node) return -ENOMEM;