diff --git a/MAINTAINERS b/MAINTAINERS index 7f72d54f065d..42e65d128d01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi* ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR M: Or Gerlitz -M: Sagi Grimberg +M: Sagi Grimberg M: Roi Dayan L: linux-rdma@vger.kernel.org S: Supported @@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/ulp/iser/ ISCSI EXTENSIONS FOR RDMA (ISER) TARGET -M: Sagi Grimberg +M: Sagi Grimberg T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master L: linux-rdma@vger.kernel.org L: target-devel@vger.kernel.org diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index cb00d59da456..c2e257d97eff 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -691,7 +691,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, NULL); /* Coudn't find default GID location */ - WARN_ON(ix < 0); + if (WARN_ON(ix < 0)) + goto release; zattr_type.gid_type = gid_type; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 4a9aa0433b07..7713ef089c3c 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -48,6 +48,7 @@ #include +#include #include #include #include @@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index dd3bcceadfde..c0f3826abb30 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 28ba2cc81535..31f422a70623 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -48,6 +48,8 @@ #include +#include + #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); @@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, int srcu_key; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 15b8adbf39c0..b65b3541e732 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq); void ib_drain_qp(struct ib_qp *qp) { ib_drain_sq(qp); - ib_drain_rq(qp); + if (!qp->srq) + ib_drain_rq(qp); } EXPORT_SYMBOL(ib_drain_qp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 42a7b8952d13..3234a8be16f6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; + memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, + sizeof(dev->ibdev.iwcm->ifname)); ret = ib_register_device(&dev->ibdev, NULL); if (ret) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b4eeb783573c..b0b955724458 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, &cq->bar2_qid, user ? &cq->bar2_pa : NULL); - if (user && !cq->bar2_va) { + if (user && !cq->bar2_pa) { pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", pci_name(rdev->lldi.pdev), cq->cqid); ret = -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 124682dc5709..7574f394fdac 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; + memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, + sizeof(dev->ibdev.iwcm->ifname)); ret = ib_register_device(&dev->ibdev, NULL); if (ret) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e17fb5d5e033..e8993e49b8b3 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, if (pbar2_pa) *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + + if (is_t4(rdev->lldi.adapter_type)) + return NULL; + return rdev->bar2_kva + bar2_qoffset; } @@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { + if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; @@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, void c4iw_drain_sq(struct ib_qp *ibqp) { struct c4iw_qp *qp = to_c4iw_qp(ibqp); + unsigned long flag; + bool need_to_wait; - wait_for_completion(&qp->sq_drained); + spin_lock_irqsave(&qp->lock, flag); + need_to_wait = !t4_sq_empty(&qp->wq); + spin_unlock_irqrestore(&qp->lock, flag); + + if (need_to_wait) + wait_for_completion(&qp->sq_drained); } void c4iw_drain_rq(struct ib_qp *ibqp) { struct c4iw_qp *qp = to_c4iw_qp(ibqp); + unsigned long flag; + bool need_to_wait; - wait_for_completion(&qp->rq_drained); + spin_lock_irqsave(&qp->lock, flag); + need_to_wait = !t4_rq_empty(&qp->wq); + spin_unlock_irqrestore(&qp->lock, flag); + + if (need_to_wait) + wait_for_completion(&qp->rq_drained); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 99eb1c1a3b7b..6ad0489cb3c5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_sge_rd = props->max_sge; + props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 3ea9e055fdd3..92914539edc7 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); */ - if (!netif_carrier_ok(netdev)) - return NETDEV_TX_OK; - if (netif_queue_stopped(netdev)) return NETDEV_TX_BUSY; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index e449e394963f..24f4a782e0f4 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -45,6 +45,8 @@ #include #include +#include + #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" @@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd.type)) { ret = -EINVAL; goto bail; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bd82a6948dc8..a9e3bcc522c4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1637,9 +1637,9 @@ bail: spin_unlock_irqrestore(&qp->s_hlock, flags); if (nreq) { if (call_send) - rdi->driver_f.schedule_send_no_lock(qp); - else rdi->driver_f.do_send(qp); + else + rdi->driver_f.schedule_send_no_lock(qp); } return err; } diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO index 05de0dad8762..4c6f1d7d2eaf 100644 --- a/drivers/staging/rdma/hfi1/TODO +++ b/drivers/staging/rdma/hfi1/TODO @@ -3,4 +3,4 @@ July, 2015 - Remove unneeded file entries in sysfs - Remove software processing of IB protocol and place in library for use by qib, ipath (if still present), hfi1, and eventually soft-roce - +- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 8396dc5fb6c1..c1c5bf82addb 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -49,6 +49,8 @@ #include #include +#include + #include "hfi.h" #include "pio.h" #include "device.h" @@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, int uctxt_required = 1; int must_be_root = 0; + /* FIXME: This interface cannot continue out of staging */ + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd)) { ret = -EINVAL; goto bail; @@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) spin_unlock_irqrestore(&dd->uctxt_lock, flags); dd->rcd[uctxt->ctxt] = NULL; + + hfi1_user_exp_rcv_free(fdata); + hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + uctxt->rcvwait_to = 0; uctxt->piowait_to = 0; uctxt->rcvnowait = 0; uctxt->pionowait = 0; uctxt->event_flags = 0; - hfi1_user_exp_rcv_free(fdata); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); - hfi1_stats.sps_ctxts--; if (++dd->freectxts == dd->num_user_contexts) aspm_enable_all(dd); @@ -1127,27 +1134,13 @@ bail: static int user_init(struct file *fp) { - int ret; unsigned int rcvctrl_ops = 0; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; /* make sure that the context has already been setup */ - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) { - ret = -EFAULT; - goto done; - } - - /* - * Subctxts don't need to initialize anything since master - * has done it. - */ - if (fd->subctxt) { - ret = wait_event_interruptible(uctxt->wait, !test_bit( - HFI1_CTXT_MASTER_UNINIT, - &uctxt->event_flags)); - goto expected; - } + if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) + return -EFAULT; /* initialize poll variables... */ uctxt->urgent = 0; @@ -1202,19 +1195,7 @@ static int user_init(struct file *fp) wake_up(&uctxt->wait); } -expected: - /* - * Expected receive has to be setup for all processes (including - * shared contexts). However, it has to be done after the master - * context has been fully configured as it depends on the - * eager/expected split of the RcvArray entries. - * Setting it up here ensures that the subcontexts will be waiting - * (due to the above wait_event_interruptible() until the master - * is setup. - */ - ret = hfi1_user_exp_rcv_init(fp); -done: - return ret; + return 0; } static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) @@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp) int ret = 0; /* - * Context should be set up only once (including allocation and + * Context should be set up only once, including allocation and * programming of eager buffers. This is done if context sharing * is not requested or by the master process. */ @@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp) if (ret) goto done; } + } else { + ret = wait_event_interruptible(uctxt->wait, !test_bit( + HFI1_CTXT_MASTER_UNINIT, + &uctxt->event_flags)); + if (ret) + goto done; } + ret = hfi1_user_sdma_alloc_queues(uctxt, fp); + if (ret) + goto done; + /* + * Expected receive has to be setup for all processes (including + * shared contexts). However, it has to be done after the master + * context has been fully configured as it depends on the + * eager/expected split of the RcvArray entries. + * Setting it up here ensures that the subcontexts will be waiting + * (due to the above wait_event_interruptible() until the master + * is setup. + */ + ret = hfi1_user_exp_rcv_init(fp); if (ret) goto done; @@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) { struct hfi1_devdata *dd = filp->private_data; - switch (whence) { - case SEEK_SET: - break; - case SEEK_CUR: - offset += filp->f_pos; - break; - case SEEK_END: - offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) - - offset; - break; - default: - return -EINVAL; - } - - if (offset < 0) - return -EINVAL; - - if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) - return -EINVAL; - - filp->f_pos = offset; - - return filp->f_pos; + return fixed_size_llseek(filp, offset, whence, + (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); } /* NOTE: assumes unsigned long is 8 bytes */ diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c index c7ad0164ea9a..b3f0682a36c9 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/staging/rdma/hfi1/mmu_rb.c @@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, struct mm_struct *, unsigned long, unsigned long); static void mmu_notifier_mem_invalidate(struct mmu_notifier *, + struct mm_struct *, unsigned long, unsigned long); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); @@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) rbnode = rb_entry(node, struct mmu_rb_node, node); rb_erase(node, root); if (handler->ops->remove) - handler->ops->remove(root, rbnode, false); + handler->ops->remove(root, rbnode, NULL); } } @@ -176,7 +177,7 @@ unlock: return ret; } -/* Caller must host handler lock */ +/* Caller must hold handler lock */ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, unsigned long addr, unsigned long len) @@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } +/* Caller must *not* hold handler lock. */ static void __mmu_rb_remove(struct mmu_rb_handler *handler, - struct mmu_rb_node *node, bool arg) + struct mmu_rb_node *node, struct mm_struct *mm) { + unsigned long flags; + /* Validity of handler and node pointers has been checked by caller. */ hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, node->len); + spin_lock_irqsave(&handler->lock, flags); __mmu_int_rb_remove(node, handler->root); + spin_unlock_irqrestore(&handler->lock, flags); + if (handler->ops->remove) - handler->ops->remove(handler->root, node, arg); + handler->ops->remove(handler->root, node, mm); } struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, @@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) { struct mmu_rb_handler *handler = find_mmu_handler(root); - unsigned long flags; if (!handler || !node) return; - spin_lock_irqsave(&handler->lock, flags); - __mmu_rb_remove(handler, node, false); - spin_unlock_irqrestore(&handler->lock, flags); + __mmu_rb_remove(handler, node, NULL); } static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) @@ -260,7 +264,7 @@ unlock: static inline void mmu_notifier_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long addr) { - mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); + mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE); } static inline void mmu_notifier_range_start(struct mmu_notifier *mn, @@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - mmu_notifier_mem_invalidate(mn, start, end); + mmu_notifier_mem_invalidate(mn, mm, start, end); } static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); struct rb_root *root = handler->root; - struct mmu_rb_node *node; + struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; spin_lock_irqsave(&handler->lock, flags); - for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; - node = __mmu_int_rb_iter_next(node, start, end - 1)) { + for (node = __mmu_int_rb_iter_first(root, start, end - 1); + node; node = ptr) { + /* Guard against node removal. */ + ptr = __mmu_int_rb_iter_next(node, start, end - 1); hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", node->addr, node->len); - if (handler->ops->invalidate(root, node)) - __mmu_rb_remove(handler, node, true); + if (handler->ops->invalidate(root, node)) { + spin_unlock_irqrestore(&handler->lock, flags); + __mmu_rb_remove(handler, node, mm); + spin_lock_irqsave(&handler->lock, flags); + } } spin_unlock_irqrestore(&handler->lock, flags); } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h index f8523fdb8a18..19a306e83c7d 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/staging/rdma/hfi1/mmu_rb.h @@ -59,7 +59,8 @@ struct mmu_rb_node { struct mmu_rb_ops { bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); int (*insert)(struct rb_root *, struct mmu_rb_node *); - void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); + void (*remove)(struct rb_root *, struct mmu_rb_node *, + struct mm_struct *); int (*invalidate)(struct rb_root *, struct mmu_rb_node *); }; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 29a5ad28019b..dc9119e1b458 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait) * do the flush work until that QP's * sdma work has finished. */ + spin_lock(&qp->s_lock); if (qp->s_flags & RVT_S_WAIT_DMA) { qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } + spin_unlock(&qp->s_lock); } /** diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c index 0861e095df8d..8bd56d5c783d 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c @@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); +static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, + struct mm_struct *); static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, @@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_group *grp, *gptr; + if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) + return 0; /* * The notifier would have been removed when the process'es mm * was freed. @@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); + mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); else hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); @@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, continue; if (HFI1_CAP_IS_USET(TID_UNMAP)) mmu_rb_remove(&fd->tid_rb_root, - &node->mmu, false); + &node->mmu, NULL); else hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); @@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) } static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, - bool notifier) + struct mm_struct *mm) { struct hfi1_filedata *fdata = container_of(root, struct hfi1_filedata, tid_rb_root); diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index ab6b6a42000f..d53a659548e0 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, struct user_sdma_iovec *); -static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); +static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, + unsigned); static int check_header_template(struct user_sdma_request *, struct hfi1_pkt_header *, u32, u32); static int set_txreq_header(struct user_sdma_request *, @@ -299,7 +300,8 @@ static int defer_packet_queue( static void activate_packet_queue(struct iowait *, int); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); +static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, + struct mm_struct *); static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static struct mmu_rb_ops sdma_rb_ops = { @@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req, rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); - if (rb_node) + if (rb_node && !IS_ERR(rb_node)) node = container_of(rb_node, struct sdma_mmu_node, rb); + else + rb_node = NULL; if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -1107,7 +1111,8 @@ retry: goto bail; } if (pinned != npages) { - unpin_vector_pages(current->mm, pages, pinned); + unpin_vector_pages(current->mm, pages, node->npages, + pinned); ret = -EFAULT; goto bail; } @@ -1147,9 +1152,9 @@ bail: } static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, - unsigned npages) + unsigned start, unsigned npages) { - hfi1_release_user_pages(mm, pages, npages, 0); + hfi1_release_user_pages(mm, pages + start, npages, 0); kfree(pages); } @@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) &req->pq->sdma_rb_root, (unsigned long)req->iovs[i].iov.iov_base, req->iovs[i].iov.iov_len); - if (!mnode) + if (!mnode || IS_ERR(mnode)) continue; node = container_of(mnode, struct sdma_mmu_node, rb); @@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) } static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, - bool notifier) + struct mm_struct *mm) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); @@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, node->pq->n_locked -= node->npages; spin_unlock(&node->pq->evict_lock); - unpin_vector_pages(notifier ? NULL : current->mm, node->pages, + /* + * If mm is set, we are being called by the MMU notifier and we + * should not pass a mm_struct to unpin_vector_page(). This is to + * prevent a deadlock when hfi1_release_user_pages() attempts to + * take the mmap_sem, which the MMU notifier has already taken. + */ + unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, node->npages); /* * If called by the MMU notifier, we have to adjust the pinned * page count ourselves. */ - if (notifier) - current->mm->pinned_vm -= node->npages; + if (mm) + mm->pinned_vm -= node->npages; kfree(node); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..b3575f392492 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -392,6 +392,17 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; diff --git a/include/rdma/ib.h b/include/rdma/ib.h index cf8f9e700e48..a6b93706b0fc 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -34,6 +34,7 @@ #define _RDMA_IB_H #include +#include struct ib_addr { union { @@ -86,4 +87,19 @@ struct sockaddr_ib { __u64 sib_scope_id; }; +/* + * The IB interfaces that use write() as bi-directional ioctl() are + * fundamentally unsafe, since there are lots of ways to trigger "write()" + * calls from various contexts with elevated privileges. That includes the + * traditional suid executable error message writes, but also various kernel + * interfaces that can write to file descriptors. + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static inline bool ib_safe_file_access(struct file *filp) +{ + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); +} + #endif /* _RDMA_IB_H */