diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..5920c0085d35 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4399,6 +4399,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4408,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..d78f67623f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..2dd2cfe9b561 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device, if (!netdev) return -ENODEV; - dev_put(netdev); - port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); @@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device, port_attr->state = IB_PORT_DOWN; port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else { - inetdev = in_dev_get(netdev); + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); if (inetdev && inetdev->ifa_list) { port_attr->state = IB_PORT_ACTIVE; port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - in_dev_put(inetdev); } else { port_attr->state = IB_PORT_INIT; port_attr->phys_state = IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } + + rcu_read_unlock(); } + dev_put(netdev); err = device->ops.query_port(device, port_num, port_attr); if (err) return err; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..65b36548bc17 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..6eb6d2717ca5 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f67a30fda1ed..163ff7ba92b7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * that the hardware will not attempt to access the MR any more. */ if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); } @@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; + lockdep_assert_held(&umem_odp->umem_mutex); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the @@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..35c284af574d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..2ed7bfd5feea 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8056930bbe2c..cd9ee1664a69 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 59022b744144..d609f4659afb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->mr_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto obj_destroy; + + if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) + obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); + obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); if (err) goto obj_destroy; } - - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); - if (err) - goto err_copy; - - if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) - obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); - return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..1a98ee2e01c4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,7 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; + unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -639,7 +639,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..630599311586 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - mr->live = 0; + WRITE_ONCE(mr->live, 0); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, @@ -1987,14 +1962,25 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->mr_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..3f9478d19376 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } + /* + * The locking here is pretty subtle. Ideally the implicit children + * list would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the children list must be followed by the locked + * update_xlt before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the children list visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the children list. + */ + lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); + lockdep_assert_held(&mr->dev->mr_srcu); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work) struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + int srcu_key; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_odp_release(odp); - if (imr->live) + if (smp_load_acquire(&imr->live)) { + srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + } + ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); if (atomic_dec_and_test(&imr->num_leaf_free)) @@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); + WRITE_ONCE(mr->live, 0); + umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); } + mutex_unlock(&umem_odp->umem_mutex); } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->live = 1; - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", mr->mmkey.key, dev->mdev, mr); @@ -484,6 +513,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); + smp_store_release(&mtt->live, 1); + if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; nentries++; @@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + smp_store_release(&imr->live, 1); return imr; } @@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) if (mr->parent != imr) continue; + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - if (umem_odp->dying) + if (umem_odp->dying) { + mutex_unlock(&umem_odp->umem_mutex); continue; + } - WRITE_ONCE(umem_odp->dying, 1); + umem_odp->dying = 1; atomic_inc(&imr->num_leaf_free); schedule_work(&umem_odp->work); + mutex_unlock(&umem_odp->umem_mutex); } up_read(&per_mm->umem_rwsem); @@ -773,7 +809,7 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { + if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { mlx5_ib_dbg(dev, "got dead MR\n"); ret = -EFAULT; goto srcu_unlock; @@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) { + if (!smp_load_acquire(&mr->live)) { ret = false; break; } - if (!mr->live) { + if (mr->ibmr.pd != pd) { ret = false; break; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..36cdfbdbd325 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 430314c8abd9..52d402f39df9 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); + + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9231b39d18b2..c501bf2a0252 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct xarray *mkeys = &dev->priv.mkey_table; - struct mlx5_core_mkey *deleted_mkey; unsigned long flags; xa_lock_irqsave(mkeys, flags); - deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); xa_unlock_irqrestore(mkeys, flags); - if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", - mlx5_base_mkey(mkey->key)); - return -ENOENT; - } MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));