From 063af59597492d31c44e549d6c773b6485f7dc53 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 28 Sep 2016 20:24:12 +0000 Subject: [PATCH 1/6] IB/rxe: Avoid scheduling tasklet for userspace QP This patch avoids scheduing tasklet for WQE and protocol processing for user space QP. It performs the task in calling process context. To improve code readability kernel specific post_send handling moved to post_send_kernel() function. Signed-off-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 38 ++++++++++++++++++--------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4552be960c6a..a5af6917fc1c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -801,26 +801,15 @@ err1: return err; } -static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) { int err = 0; - struct rxe_qp *qp = to_rqp(ibqp); unsigned int mask; unsigned int length = 0; int i; int must_sched; - if (unlikely(!qp->valid)) { - *bad_wr = wr; - return -EINVAL; - } - - if (unlikely(qp->req.state < QP_STATE_READY)) { - *bad_wr = wr; - return -EINVAL; - } - while (wr) { mask = wr_opcode_mask(wr->opcode, qp); if (unlikely(!mask)) { @@ -861,6 +850,29 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } +static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + if (qp->is_user) { + /* Utilize process context to do protocol processing */ + rxe_run_task(&qp->req.task, 0); + return 0; + } else + return rxe_post_send_kernel(qp, wr, bad_wr); +} + static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { From ffae955d49e6cff28589d6e7388bb9275f9ce2d1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 28 Sep 2016 20:24:42 +0000 Subject: [PATCH 2/6] IB/rxe: Fix sending out loopback packet on netdev interface. Both prepare4 and prepare6 sets loopback mask in pkt_info structure instance of skb. The xmit_packet and other requester side functions use a pkt_info struct from the stack without the proper mask. This results in sending out the packet to the actual netdev device and loopback functionality is broken. Modify prepare() to pass its correctly marked pkt_info struct to prepare4() and prepare6() instead of them using SKB_TO_PKT(skb) and getting an incorrectly set mask. Verified with perftest applications. Signed-off-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index eedf2f1cafdf..549aa84f9f9f 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -350,14 +350,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route4(rxe->ndev, saddr, daddr); if (!dst) { @@ -376,12 +376,12 @@ static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) return 0; } -static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route6(rxe->ndev, saddr, daddr); if (!dst) { @@ -408,9 +408,9 @@ static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_av *av = rxe_get_av(pkt); if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(rxe, skb, av); + err = prepare4(rxe, pkt, skb, av); else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(rxe, skb, av); + err = prepare6(rxe, pkt, skb, av); *crc = rxe_icrc_hdr(pkt, skb); From d9703650f4aba7555fde92636d8d9a689029e8f8 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 28 Sep 2016 20:25:47 +0000 Subject: [PATCH 3/6] IB/{rxe,core,rdmavt}: Fix kernel crash for reg MR This patch fixes below kernel crash on memory registration for rxe and other transport drivers which has dma_ops extension. IB/core invokes ib_map_sg_attrs() in generic manner with dma attributes which is used by mlx5 and mthca adapters. However in doing so it ignored honoring dma_ops extension of software based transports for sg map/unmap operation. This results in calling dma_map_sg_attrs of hardware virtual device resulting in crash for null reference. We extend the core to support sg_map/unmap_attrs and transport drivers to implement those dma_ops callback functions. Verified usign perftest applications. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] check_addr+0x35/0x60 ... Call Trace: [] ? nommu_map_sg+0x99/0xd0 [] ib_umem_get+0x3d6/0x470 [ib_core] [] rxe_mem_init_user+0x49/0x270 [rdma_rxe] [] ? rxe_add_index+0xca/0x100 [rdma_rxe] [] rxe_reg_user_mr+0x9f/0x130 [rdma_rxe] [] ib_uverbs_reg_mr+0x14e/0x2c0 [ib_uverbs] [] ib_uverbs_write+0x15b/0x3b0 [ib_uverbs] [] ? mem_cgroup_commit_charge+0x76/0xe0 [] ? page_add_new_anon_rmap+0x89/0xc0 [] ? lru_cache_add_active_or_unevictable+0x39/0xc0 [] __vfs_write+0x28/0x120 [] ? rw_verify_area+0x49/0xb0 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x46/0xa0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 Signed-off-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/dma.c | 17 +++++++++++++++++ drivers/infiniband/sw/rxe/rxe_dma.c | 17 +++++++++++++++++ include/rdma/ib_verbs.h | 23 ++++++++++++++++++++--- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 33076a5eee2f..01f71caa3ac4 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -138,6 +138,21 @@ static void rvt_unmap_sg(struct ib_device *dev, /* This is a stub, nothing to be done here */ } +static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_map_sg(dev, sgl, nents, direction); +} + +static void rvt_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_unmap_sg(dev, sg, nents, direction); +} + static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -177,6 +192,8 @@ struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = { .unmap_page = rvt_dma_unmap_page, .map_sg = rvt_map_sg, .unmap_sg = rvt_unmap_sg, + .map_sg_attrs = rvt_map_sg_attrs, + .unmap_sg_attrs = rvt_unmap_sg_attrs, .sync_single_for_cpu = rvt_sync_single_for_cpu, .sync_single_for_device = rvt_sync_single_for_device, .alloc_coherent = rvt_dma_alloc_coherent, diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c index 7634c1a81b2b..a0f8af5851ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_dma.c +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -117,6 +117,21 @@ static void rxe_unmap_sg(struct ib_device *dev, WARN_ON(!valid_dma_direction(direction)); } +static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rxe_map_sg(dev, sgl, nents, direction); +} + +static void rxe_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + rxe_unmap_sg(dev, sg, nents, direction); +} + static void rxe_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -159,6 +174,8 @@ struct ib_dma_mapping_ops rxe_dma_mapping_ops = { .unmap_page = rxe_dma_unmap_page, .map_sg = rxe_map_sg, .unmap_sg = rxe_unmap_sg, + .map_sg_attrs = rxe_map_sg_attrs, + .unmap_sg_attrs = rxe_unmap_sg_attrs, .sync_single_for_cpu = rxe_sync_single_for_cpu, .sync_single_for_device = rxe_sync_single_for_device, .alloc_coherent = rxe_dma_alloc_coherent, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e1f96737c2a1..9e935655fccb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1739,6 +1739,14 @@ struct ib_dma_mapping_ops { void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); + int (*map_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs); + void (*unmap_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, @@ -3000,8 +3008,12 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); + if (dev->dma_ops) + return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction, + dma_attrs); + else + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@ -3009,7 +3021,12 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + if (dev->dma_ops) + return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction, + dma_attrs); + else + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry From b6bbee0d2438a2c9c7525f5bd7047a8b2ce4f38f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 28 Sep 2016 20:26:44 +0000 Subject: [PATCH 4/6] IB/rxe: Properly honor max IRD value for rd/atomic. This patch honoris the max incoming read request count instead of outgoing read req count (a) during modify qp by allocating response queue metadata (b) during incoming read request processing Signed-off-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 +- drivers/infiniband/sw/rxe/rxe_qp.c | 28 +++++++++++++++------------- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4a5484ef604f..73849a5a91b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -198,7 +198,7 @@ void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) { qp->resp.res_head++; - if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) + if (unlikely(qp->resp.res_head == qp->attr.max_dest_rd_atomic)) qp->resp.res_head = 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 22ba24f2a2c1..62c37a563df9 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -146,7 +146,7 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { int i; - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); @@ -174,7 +174,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) struct resp_res *res; if (qp->resp.resources) { - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); } @@ -596,16 +596,23 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); - free_rd_atomic_resources(qp); - - err = alloc_rd_atomic_resources(qp, max_rd_atomic); - if (err) - return err; - qp->attr.max_rd_atomic = max_rd_atomic; atomic_set(&qp->req.rd_atomic, max_rd_atomic); } + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + int max_dest_rd_atomic = + __roundup_pow_of_two(attr->max_dest_rd_atomic); + + qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; + + free_rd_atomic_resources(qp); + + err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic); + if (err) + return err; + } + if (mask & IB_QP_CUR_STATE) qp->attr.cur_qp_state = attr->qp_state; @@ -701,11 +708,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, pr_debug("set req psn = 0x%x\n", qp->req.psn); } - if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp->attr.max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); - } - if (mask & IB_QP_PATH_MIG_STATE) qp->attr.path_mig_state = attr->path_mig_state; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3e0f0f2baace..d544b5e84797 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -383,7 +383,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, * too many read/atomic ops, we just * recycle the responder resource queue */ - if (likely(qp->attr.max_rd_atomic > 0)) + if (likely(qp->attr.max_dest_rd_atomic > 0)) return RESPST_CHK_LENGTH; else return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; From b9fe856e545491fd57fc7ca195d0123df0124e7c Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 23 Sep 2016 09:32:11 -0600 Subject: [PATCH 5/6] rdma_rxe: Ensure rdma_rxe init occurs at correct time There is a problem when CONFIG_RDMA_RXE=y and CONFIG_IPV6=y. This results in the rdma_rxe initialization occurring before the IPv6 services are ready. This patch delays the initialization of rdma_rxe until after the IPv6 services are ready. This fix is based on one proposed by Logan Gunthorpe on a much older code base. Signed-off-by: Stephen Bates Reviewed-by: Yonatan Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index ddd59270ff6d..9a48e7277673 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -401,5 +401,5 @@ static void __exit rxe_module_exit(void) pr_info("rxe: unloaded\n"); } -module_init(rxe_module_init); +late_initcall(rxe_module_init); module_exit(rxe_module_exit); From e404f945a6101a6f0f2a29aade6dc32015471daa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 28 Sep 2016 20:26:26 +0000 Subject: [PATCH 6/6] IB/rxe: improved debug prints & code cleanup 1. Debugging qp state transitions and qp errors in loopback and multiple QP tests is difficult without qp numbers in debug logs. This patch adds qp number to important debug logs. 2. Instead of having rxe: prefix in few logs and not having in few logs, using uniform module name prefix using pr_fmt macro. 3. Code cleanup for various warnings reported by checkpatch for incomplete unsigned data type, line over 80 characters, return statements. Signed-off-by: Parav Pandit Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe.c | 34 +++++------------------ drivers/infiniband/sw/rxe/rxe.h | 5 ++++ drivers/infiniband/sw/rxe/rxe_av.c | 4 +-- drivers/infiniband/sw/rxe/rxe_comp.c | 6 +++-- drivers/infiniband/sw/rxe/rxe_mmap.c | 2 +- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_net.c | 39 ++++++++++++++++++++------- drivers/infiniband/sw/rxe/rxe_net.h | 3 +-- drivers/infiniband/sw/rxe/rxe_qp.c | 31 +++++++++++---------- drivers/infiniband/sw/rxe/rxe_recv.c | 3 ++- drivers/infiniband/sw/rxe/rxe_req.c | 19 ++++++------- drivers/infiniband/sw/rxe/rxe_resp.c | 25 +++++++++++------ drivers/infiniband/sw/rxe/rxe_sysfs.c | 12 ++++----- drivers/infiniband/sw/rxe/rxe_verbs.c | 12 +++++---- 14 files changed, 109 insertions(+), 88 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 9a48e7277673..ab6c3c25d7ff 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -358,38 +358,16 @@ static int __init rxe_module_init(void) /* initialize slab caches for managed objects */ err = rxe_cache_init(); if (err) { - pr_err("rxe: unable to init object pools\n"); + pr_err("unable to init object pools\n"); return err; } - err = rxe_net_ipv4_init(); - if (err) { - pr_err("rxe: unable to init ipv4 tunnel\n"); - rxe_cache_exit(); - goto exit; - } - - err = rxe_net_ipv6_init(); - if (err) { - pr_err("rxe: unable to init ipv6 tunnel\n"); - rxe_cache_exit(); - goto exit; - } - - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - pr_err("rxe: Failed to rigister netdev notifier\n"); - goto exit; - } - - pr_info("rxe: loaded\n"); + err = rxe_net_init(); + if (err) + return err; + pr_info("loaded\n"); return 0; - -exit: - rxe_release_udp_tunnel(recv_sockets.sk4); - rxe_release_udp_tunnel(recv_sockets.sk6); - return err; } static void __exit rxe_module_exit(void) @@ -398,7 +376,7 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_cache_exit(); - pr_info("rxe: unloaded\n"); + pr_info("unloaded\n"); } late_initcall(rxe_module_init); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 12c71c549f97..a696af81e4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -34,6 +34,11 @@ #ifndef RXE_H #define RXE_H +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 5c9474212d4e..604f6fee96bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -39,7 +39,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) struct rxe_port *port; if (attr->port_num != 1) { - pr_info("rxe: invalid port_num = %d\n", attr->port_num); + pr_info("invalid port_num = %d\n", attr->port_num); return -EINVAL; } @@ -47,7 +47,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) if (attr->ah_flags & IB_AH_GRH) { if (attr->grh.sgid_index > port->attr.gid_tbl_len) { - pr_info("rxe: invalid sgid index = %d\n", + pr_info("invalid sgid index = %d\n", attr->grh.sgid_index); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 1c59ef2c67aa..6c5e29db88e3 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -567,7 +567,8 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("state = %s\n", comp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -709,7 +710,8 @@ int rxe_completer(void *arg) qp->comp.rnr_retry--; qp->req.need_retry = 1; - pr_debug("set rnr nak timer\n"); + pr_debug("qp#%d set rnr nak timer\n", + qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 54b3c7c99eff..c572a4c09359 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -126,7 +126,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + pr_err("err %d from remap_vmalloc_range\n", ret); goto done; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index f3dab6574504..1869152f1d23 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -39,7 +39,7 @@ */ static u8 rxe_get_key(void) { - static unsigned key = 1; + static u32 key = 1; key = key << 1; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 549aa84f9f9f..b8258e4f0aea 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -65,7 +65,7 @@ struct rxe_dev *net_to_rxe(struct net_device *ndev) return found; } -struct rxe_dev *get_rxe_by_name(const char* name) +struct rxe_dev *get_rxe_by_name(const char *name) { struct rxe_dev *rxe; struct rxe_dev *found = NULL; @@ -601,8 +601,7 @@ void rxe_port_up(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_UP; rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); - pr_info("rxe: set %s active\n", rxe->ib_dev.name); - return; + pr_info("set %s active\n", rxe->ib_dev.name); } /* Caller must hold net_info_lock */ @@ -615,8 +614,7 @@ void rxe_port_down(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; rxe_port_event(rxe, IB_EVENT_PORT_ERR); - pr_info("rxe: set %s down\n", rxe->ib_dev.name); - return; + pr_info("set %s down\n", rxe->ib_dev.name); } static int rxe_notify(struct notifier_block *not_blk, @@ -641,7 +639,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); + pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_REBOOT: @@ -651,7 +649,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("rxe: ignoring netdev event = %ld for %s\n", + pr_info("ignoring netdev event = %ld for %s\n", event, ndev->name); break; } @@ -671,7 +669,7 @@ int rxe_net_ipv4_init(void) htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { recv_sockets.sk4 = NULL; - pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } @@ -688,7 +686,7 @@ int rxe_net_ipv6_init(void) htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + pr_err("Failed to create IPv6 UDP tunnel\n"); return -1; } #endif @@ -701,3 +699,26 @@ void rxe_net_exit(void) rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } + +int rxe_net_init(void) +{ + int err; + + recv_sockets.sk6 = NULL; + + err = rxe_net_ipv4_init(); + if (err) + return err; + err = rxe_net_ipv6_init(); + if (err) + goto err_out; + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("Failed to register netdev notifier\n"); + goto err_out; + } + return 0; +err_out: + rxe_net_exit(); + return err; +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 0daf7f09e5b5..1c06b3bfe1b6 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -49,8 +49,7 @@ void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); -int rxe_net_ipv4_init(void); -int rxe_net_ipv6_init(void); +int rxe_net_init(void); void rxe_net_exit(void); #endif /* RXE_NET_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 62c37a563df9..b8036cfbce04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -298,8 +298,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp->rq.max_wr, qp->rq.max_sge, wqe_size); + pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, @@ -680,24 +680,27 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("set retry count = %d\n", attr->retry_cnt); + pr_debug("qp#%d set retry count = %d\n", qp_num(qp), + attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("set rnr retry count = %d\n", attr->rnr_retry); + pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), + attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("set resp psn = 0x%x\n", qp->resp.psn); + pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), + qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("set min rnr timer = 0x%x\n", + pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), attr->min_rnr_timer); } @@ -705,7 +708,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("set req psn = 0x%x\n", qp->req.psn); + pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -719,38 +722,38 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp state -> RESET\n"); + pr_debug("qp#%d state -> RESET\n", qp_num(qp)); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp state -> INIT\n"); + pr_debug("qp#%d state -> INIT\n", qp_num(qp)); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp state -> RTR\n"); + pr_debug("qp#%d state -> RTR\n", qp_num(qp)); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp state -> RTS\n"); + pr_debug("qp#%d state -> RTS\n", qp_num(qp)); qp->req.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp state -> SQD\n"); + pr_debug("qp#%d state -> SQD\n", qp_num(qp)); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp state -> SQE !!?\n"); + pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp state -> ERR\n"); + pr_debug("qp#%d state -> ERR\n", qp_num(qp)); rxe_qp_error(qp); break; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 144d2f129fcd..46f062842a9a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -387,7 +387,8 @@ int rxe_rcv(struct sk_buff *skb) pack_icrc = be32_to_cpu(*icrcp); calc_icrc = rxe_icrc_hdr(pkt, skb); - calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), + payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { char saddr[sizeof(struct in6_addr)]; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 13a848a518e8..832846b73ea0 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -38,7 +38,7 @@ #include "rxe_queue.h" static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode); + u32 opcode); static inline void retry_first_write_send(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -121,7 +121,7 @@ void rnr_nak_timer(unsigned long data) { struct rxe_qp *qp = (struct rxe_qp *)data; - pr_debug("rnr nak timer fired\n"); + pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); rxe_run_task(&qp->req.task, 1); } @@ -187,7 +187,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) return wqe; } -static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -259,7 +259,7 @@ static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) return -EINVAL; } -static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -311,7 +311,7 @@ static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) } static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode) + u32 opcode) { int fits = (wqe->dma.resid <= qp->mtu); @@ -588,7 +588,7 @@ int rxe_requester(void *arg) struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; - unsigned mask; + enum rxe_hdr_mask mask; int payload; int mtu; int opcode; @@ -626,7 +626,8 @@ next_wqe: rmr = rxe_pool_get_index(&rxe->mr_pool, wqe->wr.ex.invalidate_rkey >> 8); if (!rmr) { - pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); + pr_err("No mr for key %#x\n", + wqe->wr.ex.invalidate_rkey); wqe->state = wqe_state_error; wqe->status = IB_WC_MW_BIND_ERR; goto exit; @@ -702,12 +703,12 @@ next_wqe: skb = init_req_packet(qp, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("Failed allocating skb\n"); + pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); goto err; } if (fill_packet(qp, wqe, &pkt, skb, payload)) { - pr_debug("Error during fill packet\n"); + pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); goto err; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index d544b5e84797..dd3d88adc003 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -749,6 +749,18 @@ static enum resp_states read_reply(struct rxe_qp *qp, return state; } +static void build_rdma_network_hdr(union rdma_network_hdr *hdr, + struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(hdr, 0, sizeof(*hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh)); +} + /* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere. */ @@ -761,13 +773,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { union rdma_network_hdr hdr; - struct sk_buff *skb = PKT_TO_SKB(pkt); - memset(&hdr, 0, sizeof(hdr)); - if (skb->protocol == htons(ETH_P_IP)) - memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); - else if (skb->protocol == htons(ETH_P_IPV6)) - memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); + build_rdma_network_hdr(&hdr, pkt); err = send_data_in(qp, &hdr, sizeof(hdr)); if (err) @@ -881,7 +888,8 @@ static enum resp_states do_complete(struct rxe_qp *qp, rmr = rxe_pool_get_index(&rxe->mr_pool, wc->ex.invalidate_rkey >> 8); if (unlikely(!rmr)) { - pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); + pr_err("Bad rkey %#x invalidation\n", + wc->ex.invalidate_rkey); return RESPST_ERROR; } rmr->state = RXE_MEM_STATE_FREE; @@ -1208,7 +1216,8 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("state = %s\n", resp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index cf8e77800046..d5ed7571128f 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -79,7 +79,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); err = -EINVAL; goto err; } @@ -92,20 +92,20 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) } if (net_to_rxe(ndev)) { - pr_err("rxe: already configured on %s\n", intf); + pr_err("already configured on %s\n", intf); err = -EINVAL; goto err; } rxe = rxe_net_add(ndev); if (!rxe) { - pr_err("rxe: failed to add %s\n", intf); + pr_err("failed to add %s\n", intf); err = -EINVAL; goto err; } rxe_set_port_state(ndev); - pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); + pr_info("added %s to %s\n", rxe->ib_dev.name, intf); err: if (ndev) dev_put(ndev); @@ -120,7 +120,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); return -EINVAL; } @@ -133,7 +133,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) rxe = get_rxe_by_name(intf); if (!rxe) { - pr_err("rxe: not configured on %s\n", intf); + pr_err("not configured on %s\n", intf); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a5af6917fc1c..19841c863daf 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -100,10 +100,12 @@ static int rxe_query_port(struct ib_device *dev, rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); speed = cmd.speed; } else { - pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); + pr_warn("%s speed is unknown, defaulting to 1000\n", + rxe->ndev->name); speed = 1000; } - rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, + &attr->active_width); mutex_unlock(&rxe->usdev_lock); return 0; @@ -761,7 +763,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, } static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, - unsigned mask, u32 length) + unsigned int mask, u32 length) { int err; struct rxe_sq *sq = &qp->sq; @@ -1145,8 +1147,8 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int *sg_offset) +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) { struct rxe_mem *mr = to_rmr(ibmr); int n;