From 969a60f9db3f879f95bd37026a3c3bf02cc2568f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:43 -0700 Subject: [PATCH 01/52] IB/srp: Remove use of cached P_Key/GID queries The SRP initiator is currently using ib_find_cached_pkey() and ib_get_cached_gid() in situations where the uncached ib_find_pkey() and ib_query_gid() functions serve just as well: sleeping is allowed and performance is not an issue. Since we want to eliminate the cached operations in the long term, convert SRP to use the uncached variants. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 435145709dd6..81cc59ca5595 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -49,8 +49,6 @@ #include #include -#include - #include "ib_srp.h" #define DRV_NAME "ib_srp" @@ -183,10 +181,10 @@ static int srp_init_qp(struct srp_target_port *target, if (!attr) return -ENOMEM; - ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, - target->srp_host->port, - be16_to_cpu(target->path.pkey), - &attr->pkey_index); + ret = ib_find_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); if (ret) goto out; @@ -1883,8 +1881,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; - ib_get_cached_gid(host->srp_dev->dev, host->port, 0, - &target->path.sgid); + ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " From 929555a2baed9b0b050d03532655bfd721a43c44 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 14 Jul 2008 23:48:43 -0700 Subject: [PATCH 02/52] RDMA/nes: Remove unnecessary memset() Remove an explicit memset(..., 0, ...) of a 'listener' structure allocated with kzalloc(). Signed-off-by: Christophe Jaillet Acked-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9a4b40fae40d..6aa531d5276d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1603,7 +1603,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, return NULL; } - memset(listener, 0, sizeof(struct nes_cm_listener)); listener->loc_addr = htonl(cm_info->loc_addr); listener->loc_port = htons(cm_info->loc_port); listener->reused_node = 0; From a9474917099e007c0f51d5474394b5890111614f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 14 Jul 2008 23:48:43 -0700 Subject: [PATCH 03/52] RDMA: Fix license text The license text for several files references a third software license that was inadvertently copied in. Update the license to what was intended. This update was based on a request from HP. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 41 ++++++++++++++++++--------------- drivers/infiniband/core/cma.c | 42 +++++++++++++++++++--------------- include/rdma/ib_addr.h | 42 +++++++++++++++++++--------------- include/rdma/rdma_cm.h | 42 +++++++++++++++++++--------------- include/rdma/rdma_cm_ib.h | 42 +++++++++++++++++++--------------- 5 files changed, 115 insertions(+), 94 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 781ea5950373..e4eb8be3bb0c 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -4,28 +4,33 @@ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 671f13738054..e5bd6172a1f6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4,29 +4,33 @@ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c36750ff6ae8..b42bdd000419 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -2,29 +2,33 @@ * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #if !defined(IB_ADDR_H) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 010f876f41d8..d8f9a95541c0 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -2,29 +2,33 @@ * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #if !defined(RDMA_CM_H) diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 950424b38f16..2389c3b45404 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -1,29 +1,33 @@ /* * Copyright (c) 2006 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #if !defined(RDMA_CM_IB_H) From 164ba0893c27a216557396320b6063fdac040392 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 14 Jul 2008 23:48:43 -0700 Subject: [PATCH 04/52] IB/sa: Fail requests made while creating new SM AH This patch solves a race that occurs after an event occurs that causes the SA query module to flush its SM address handle (AH). When SM AH becomes invalid and needs an update it is handled by the global workqueue. On the other hand this event is also handled in the IPoIB driver by queuing work in the ipoib_workqueue that does multicast joins. Although queuing is in the right order, it is done to 2 different workqueues and so there is no guarantee that the first to be queued is the first to be executed. This causes a problem because IPoIB may end up sending an request to the old SM, which will take a long time to time out (since the old SM is gone); this leads to a much longer than necessary interruption in multicast traffer. The patch sets the SA query module's SM AH to NULL when the event occurs, and until update_sm_ah() is done, any request that needs sm_ah fails with -EAGAIN return status. For consumers, the patch doesn't make things worse. Before the patch, MADs are sent to the wrong SM so the request gets lost. Consumers can be improved if they examine the return code and respond to EAGAIN properly but even without an improvement the situation is not getting worse. Signed-off-by: Moni Levy Signed-off-by: Moni Shoua Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index cf474ec27070..78ea8157d622 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -361,7 +361,7 @@ static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); - struct ib_sa_sm_ah *new_ah, *old_ah; + struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct ib_ah_attr ah_attr; @@ -397,12 +397,9 @@ static void update_sm_ah(struct work_struct *work) } spin_lock_irq(&port->ah_lock); - old_ah = port->sm_ah; port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); - if (old_ah) - kref_put(&old_ah->ref, free_sm_ah); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) @@ -413,8 +410,17 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { - struct ib_sa_device *sa_dev; - sa_dev = container_of(handler, typeof(*sa_dev), event_handler); + unsigned long flags; + struct ib_sa_device *sa_dev = + container_of(handler, typeof(*sa_dev), event_handler); + struct ib_sa_port *port = + &sa_dev->port[event->element.port_num - sa_dev->start_port]; + + spin_lock_irqsave(&port->ah_lock, flags); + if (port->sm_ah) + kref_put(&port->sm_ah->ref, free_sm_ah); + port->sm_ah = NULL; + spin_unlock_irqrestore(&port->ah_lock, flags); schedule_work(&sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); @@ -519,6 +525,10 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); + if (!query->port->sm_ah) { + spin_unlock_irqrestore(&query->port->ah_lock, flags); + return -EAGAIN; + } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); From 9670e553915e67fb68f13258644342c68dc26b84 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: [PATCH 05/52] IB/mlx4: Optimize QP stamping The idea is that for QPs with fixed size work requests (eg selective signaling QPs), before stamping the WQE, we read the value of the DS field, which gives the effective size of the descriptor as used in the previous post. Then we stamp only that area, since the rest of the descriptor is already stamped. When initializing the send queue buffer, make sure the DS field is initialized to the max descriptor size so that the subsequent stamping will be done on the entire descriptor area. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a80df22deae8..4b0ac5d68c46 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) int ind; void *buf; __be32 stamp; + struct mlx4_wqe_ctrl_seg *ctrl; - s = roundup(size, 1U << qp->sq.wqe_shift); if (qp->sq_max_wqes_per_wr > 1) { + s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : @@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) *wqe = stamp; } } else { - buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); @@ -1063,6 +1065,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); + if (qp->sq_max_wqes_per_wr == 1) + ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } From fd91b1bf1bb6fb443cb8c7600c7314f093b31f40 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: [PATCH 06/52] IB/ipath: Simplify code using ARRAY_SIZE() macro Signed-off-by: Robert P. J. Day Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba7220.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index 8eee7830f042..fb70712ac85c 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -2228,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which) 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40000001, 0x1388, 0x15e, /* rest 0's */ }; - dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]); - hcnt = sizeof(hdr)/sizeof(hdr[0]); + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); if (!swapped) { /* for maintainability, do it at runtime */ for (i = 0; i < hcnt; i++) { From 4deccd6d95f1f1536dad3c842e39c1ace577329d Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: [PATCH 07/52] RDMA: Improve include file coding style Remove subversion $Id lines and improve readability by fixing other coding style problems pointed out by checkpatch.pl. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- include/rdma/ib_cache.h | 2 -- include/rdma/ib_cm.h | 2 -- include/rdma/ib_fmr_pool.h | 4 +--- include/rdma/ib_mad.h | 17 +++++++---------- include/rdma/ib_pack.h | 2 -- include/rdma/ib_sa.h | 2 -- include/rdma/ib_smi.h | 4 +--- include/rdma/ib_user_cm.h | 2 -- include/rdma/ib_user_mad.h | 2 -- include/rdma/ib_user_verbs.h | 2 -- include/rdma/ib_verbs.h | 6 ++---- include/rdma/iw_cm.h | 2 +- include/rdma/rdma_cm.h | 10 +++++----- 13 files changed, 17 insertions(+), 40 deletions(-) diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index f179d233ffc3..00a2b8ec327f 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef _IB_CACHE_H diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index a627c8682d2f..ec7c6d99ed3f 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $ */ #if !defined(IB_CM_H) #define IB_CM_H diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h index 00dadbf94e1d..f62b842e6596 100644 --- a/include/rdma/ib_fmr_pool.h +++ b/include/rdma/ib_fmr_pool.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $ */ #if !defined(IB_FMR_POOL_H) @@ -61,7 +59,7 @@ struct ib_fmr_pool_param { int pool_size; int dirty_watermark; void (*flush_function)(struct ib_fmr_pool *pool, - void * arg); + void *arg); void *flush_arg; unsigned cache:1; }; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 7228c056b9e9..5f6c40fffcf4 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -32,11 +32,9 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ -#if !defined( IB_MAD_H ) +#if !defined(IB_MAD_H) #define IB_MAD_H #include @@ -194,8 +192,7 @@ struct ib_vendor_mad { u8 data[IB_MGMT_VENDOR_DATA]; }; -struct ib_class_port_info -{ +struct ib_class_port_info { u8 base_version; u8 class_version; __be16 capability_mask; @@ -614,11 +611,11 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * any class specific header, and MAD data area. * If @rmpp_active is set, the RMPP header will be initialized for sending. */ -struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, - u32 remote_qpn, u16 pkey_index, - int rmpp_active, - int hdr_len, int data_len, - gfp_t gfp_mask); +struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + int rmpp_active, + int hdr_len, int data_len, + gfp_t gfp_mask); /** * ib_is_mad_class_rmpp - returns whether given management class diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index f926020d6331..d7fc45c4eba9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef IB_PACK_H diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 942692b0b92e..3841c1aff692 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $ */ #ifndef IB_SA_H diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index f29af135ba83..aaca0878668f 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -32,11 +32,9 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $ */ -#if !defined( IB_SMI_H ) +#if !defined(IB_SMI_H) #define IB_SMI_H #include diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index 37650afb982c..bd3d380781e0 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $ */ #ifndef IB_USER_CM_H diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h index 29d2c7205a90..d6fce1cbdb90 100644 --- a/include/rdma/ib_user_mad.h +++ b/include/rdma/ib_user_mad.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $ */ #ifndef IB_USER_MAD_H diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 8d65bf0a625b..885254f20bb3 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $ */ #ifndef IB_USER_VERBS_H diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 31d30b1852e8..5f5621bf70bd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -34,8 +34,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ */ #if !defined(IB_VERBS_H) @@ -777,7 +775,7 @@ struct ib_cq { struct ib_uobject *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); - void * cq_context; + void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ }; @@ -883,7 +881,7 @@ struct ib_dma_mapping_ops { void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, - enum dma_data_direction dir); + enum dma_data_direction dir); void (*sync_single_for_device)(struct ib_device *dev, u64 dma_handle, size_t size, diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index aeefa9b740dc..cbb822e8d791 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -62,7 +62,7 @@ struct iw_cm_event { struct sockaddr_in remote_addr; void *private_data; u8 private_data_len; - void* provider_data; + void *provider_data; }; /** diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d8f9a95541c0..22bb2e7bab1a 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -61,11 +61,11 @@ enum rdma_cm_event_type { }; enum rdma_port_space { - RDMA_PS_SDP = 0x0001, - RDMA_PS_IPOIB= 0x0002, - RDMA_PS_TCP = 0x0106, - RDMA_PS_UDP = 0x0111, - RDMA_PS_SCTP = 0x0183 + RDMA_PS_SDP = 0x0001, + RDMA_PS_IPOIB = 0x0002, + RDMA_PS_TCP = 0x0106, + RDMA_PS_UDP = 0x0111, + RDMA_PS_SCTP = 0x0183 }; struct rdma_addr { From f3781d2e89f12dd5afa046dc56032af6e39bd116 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: [PATCH 08/52] RDMA: Remove subversion $Id tags They don't get updated by git and so they're worse than useless. Signed-off-by: Roland Dreier --- drivers/infiniband/core/agent.h | 2 -- drivers/infiniband/core/cache.c | 2 -- drivers/infiniband/core/cm.c | 2 -- drivers/infiniband/core/core_priv.h | 2 -- drivers/infiniband/core/device.c | 2 -- drivers/infiniband/core/fmr_pool.c | 2 -- drivers/infiniband/core/mad_priv.h | 2 -- drivers/infiniband/core/mad_rmpp.c | 2 -- drivers/infiniband/core/mad_rmpp.h | 2 -- drivers/infiniband/core/packer.c | 2 -- drivers/infiniband/core/sa_query.c | 2 -- drivers/infiniband/core/sysfs.c | 2 -- drivers/infiniband/core/ucm.c | 2 -- drivers/infiniband/core/ud_header.c | 2 -- drivers/infiniband/core/umem.c | 2 -- drivers/infiniband/core/user_mad.c | 2 -- drivers/infiniband/core/uverbs.h | 2 -- drivers/infiniband/core/uverbs_cmd.c | 2 -- drivers/infiniband/core/uverbs_main.c | 2 -- drivers/infiniband/core/verbs.c | 2 -- drivers/infiniband/hw/mthca/mthca_allocator.c | 2 -- drivers/infiniband/hw/mthca/mthca_av.c | 2 -- drivers/infiniband/hw/mthca/mthca_catas.c | 2 -- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 -- drivers/infiniband/hw/mthca/mthca_cmd.h | 2 -- drivers/infiniband/hw/mthca/mthca_config_reg.h | 2 -- drivers/infiniband/hw/mthca/mthca_cq.c | 2 -- drivers/infiniband/hw/mthca/mthca_dev.h | 2 -- drivers/infiniband/hw/mthca/mthca_doorbell.h | 2 -- drivers/infiniband/hw/mthca/mthca_eq.c | 2 -- drivers/infiniband/hw/mthca/mthca_mad.c | 2 -- drivers/infiniband/hw/mthca/mthca_main.c | 2 -- drivers/infiniband/hw/mthca/mthca_mcg.c | 2 -- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 -- drivers/infiniband/hw/mthca/mthca_memfree.h | 2 -- drivers/infiniband/hw/mthca/mthca_mr.c | 2 -- drivers/infiniband/hw/mthca/mthca_pd.c | 2 -- drivers/infiniband/hw/mthca/mthca_profile.c | 2 -- drivers/infiniband/hw/mthca/mthca_profile.h | 2 -- drivers/infiniband/hw/mthca/mthca_provider.c | 2 -- drivers/infiniband/hw/mthca/mthca_provider.h | 2 -- drivers/infiniband/hw/mthca/mthca_qp.c | 2 -- drivers/infiniband/hw/mthca/mthca_reset.c | 2 -- drivers/infiniband/hw/mthca/mthca_srq.c | 2 -- drivers/infiniband/hw/mthca/mthca_uar.c | 2 -- drivers/infiniband/hw/mthca/mthca_user.h | 1 - drivers/infiniband/hw/mthca/mthca_wqe.h | 2 -- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 -- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 -- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 -- drivers/infiniband/ulp/iser/iser_initiator.c | 2 -- drivers/infiniband/ulp/iser/iser_memory.c | 2 -- drivers/infiniband/ulp/iser/iser_verbs.c | 2 -- drivers/infiniband/ulp/srp/ib_srp.c | 2 -- drivers/infiniband/ulp/srp/ib_srp.h | 2 -- 62 files changed, 124 deletions(-) diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index fb9ed1489f95..6669287009c2 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $ */ #ifndef __AGENT_H_ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e85f7013de57..68883565b725 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index a47fe64e5c39..55738eead3bf 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 7ad47a4b166b..05ac36e6acdb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef _CORE_PRIV_H diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5ac5ffee05cb..7913b804311e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: device.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 1286dc1b98b2..4507043d24c8 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $ */ #include diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 8b75010016ec..05ce331733b0 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $ */ #ifndef __IB_MAD_PRIV_H__ diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index a5e2a310f312..d0ef7d61c037 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ */ #include "mad_priv.h" diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h index f0616fd22494..3d336bff1148 100644 --- a/drivers/infiniband/core/mad_rmpp.h +++ b/drivers/infiniband/core/mad_rmpp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $ */ #ifndef __MAD_RMPP_H__ diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index c972d7235764..019bd4b0863e 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 78ea8157d622..1341de793e51 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $ */ #include diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 95756551cf7c..36a0ef97c6a5 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "core_priv.h" diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index b25675faaaf5..9494005d1c9a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $ */ #include diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 997c07db6d8f..8ec7876bedcf 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a1768dbb0720..6f7c096abf13 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ */ #include diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 208c7f34323c..268a2d23b7c9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 376a57ce1b40..b3ea9587dc80 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $ */ #ifndef UVERBS_H diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c3bff5fe867..112b37cd6895 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ #include diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0f34858e31e7..aeee856c4060 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $ */ #include diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 05042089de6e..9f399d3a42b4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -34,8 +34,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index a76306709618..c5ccc2daab60 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 4b111a852ff6..32f6c6315454 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index e948158a28d9..40573e48439c 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 54d230ee7d63..c33e1c53c799 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 8928ca4a9325..6efd3265f248 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CMD_H diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index afa56bfaab2e..75671f75cac4 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CONFIG_REG_H diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 20401d2ba6b2..f788fce71ac7 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bc32f8e377e..2997d8d564ea 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_DEV_H diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index b374dc395be1..14f51ef97d7e 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8bde7f98e58a..4e36aa7cb3d2 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 8b7e83e6e88f..640449582aba 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 200cf13fc9bb..fb9f91b60f30 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a8ad072be074..3f5f94879208 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index d5862e5d99a0..1f7d1a29d2a8 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index a1ab06847b75..da9b8f9b884f 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #ifndef MTHCA_MEMFREE_H diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 820205dec560..8489b1e81c0f 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index c1e950764bd8..266f14e47406 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 605a8d57fac6..d168c2540611 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index e76cb62d8e32..62b009cc8730 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROFILE_H diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index be34f99ca625..87ad889e367b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 934bf9544037..c621f8794b88 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROVIDER_H diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 09dc3614cf2c..3b1c5baf13b1 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 91934f2d9dba..acb6817f6060 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index a5ffff6e1026..4fabe62aab8a 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ #include diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 8b728486410d..ca5900c96fcf 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include /* PAGE_SHIFT */ diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index e1262c942db8..5fe56e810739 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -29,7 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * */ #ifndef MTHCA_USER_H diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index b3551a8dea1d..341a5ae881c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ */ #ifndef MTHCA_WQE_H diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca126fc2b853..0dcbab3203c9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ */ #ifndef _IPOIB_H diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 97e67d36378f..91c959299910 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 8b882bbd1d05..961c585da216 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f429bce24c20..eca8518d79a0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2442090ac8d1..f217b1edd0ac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $ */ #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 3f663fb852c1..4a6538b9301a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ */ #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8766d29ce3b7..810790ae7530 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 1cdb5cfb0ff1..b08eb56196d3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $ */ #include diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index aeb58cae9a3f..356fac6d105a 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -42,9 +42,6 @@ * Zhenyu Wang * Modified by: * Erez Zilber - * - * - * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ */ #include diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a8c1b300e34d..0e10703cf59e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -36,8 +36,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ */ #ifndef __ISCSI_ISER_H__ #define __ISCSI_ISER_H__ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 08dc81c46f41..31ad498bdc51 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index cac50c4dc159..81e49cb10ed3 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d19cfe605ebb..77cabee7cc08 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ */ #include #include diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 81cc59ca5595..ed7c5f72cb8b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ */ #include diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 63d2ae724061..e185b907fc12 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $ */ #ifndef IB_SRP_H From f89271da32bc1a636cf4eb078e615930886cd013 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: [PATCH 09/52] IPoIB: Copy small received SKBs in connected mode The connected mode implementation in the IPoIB driver has a large overhead in the way SKBs are handled in the receive flow. It usually allocates an SKB with as big as was used in the currently received SKB and moves unused fragments from the old SKB to the new one. This involves a loop on all the remaining fragments and incurs overhead on the CPU. This patch, for small SKBs, allocates an SKB just large enough to contain the received data and copies to it the data from the received SKB. The newly allocated SKB is passed to the stack and the old SKB is reposted. When running netperf, UDP small messages, without this pach I get: UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 14.4.3.178 (14.4.3.178) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 114688 128 10.00 5142034 0 526.31 114688 10.00 1130489 115.71 With this patch I get both send and receive at ~315 mbps. The reason that send performance actually slows down is as follows: When using this patch, the overhead of the CPU for handling RX packets is dramatically reduced. As a result, we do not experience RNR NAK messages from the receiver which cause the connection to be closed and reopened again; when the patch is not used, the receiver cannot handle the packets fast enough so there is less time to post new buffers and hence the mentioned RNR NACKs. So what happens is that the application *thinks* it posted a certain number of packets for transmission but these packets are flushed and do not really get transmitted. Since the connection gets opened and closed many times, each time netperf gets the CPU time that otherwise would have been given to IPoIB to actually transmit the packets. This can be verified when looking at the port counters -- the output of ifconfig and the oputput of netperf (this is for the case without the patch): tx packets ========== port counter: 1,543,996 ifconfig: 1,581,426 netperf: 5,142,034 rx packets ========== netperf 1,1304,089 Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 19 +++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0dcbab3203c9..8754b364f229 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -95,6 +95,7 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, MAX_SEND_CQE = 16, + IPOIB_CM_COPYBREAK = 256, }; #define IPOIB_OP_RECV (1ul << 31) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 91c959299910..6223fc39af70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -523,6 +523,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) u64 mapping[IPOIB_CM_RX_SG]; int frags; int has_srq; + struct sk_buff *small_skb; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -577,6 +578,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } + if (wc->byte_len < IPOIB_CM_COPYBREAK) { + int dlen = wc->byte_len; + + small_skb = dev_alloc_skb(dlen + 12); + if (small_skb) { + skb_reserve(small_skb, 12); + ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_copy_from_linear_data(skb, small_skb->data, dlen); + ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0], + dlen, DMA_FROM_DEVICE); + skb_put(small_skb, dlen); + skb = small_skb; + goto copied; + } + } + frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; @@ -599,6 +617,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); +copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f217b1edd0ac..bfe1dbf99207 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1302,6 +1302,12 @@ static int __init ipoib_init_module(void) ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif + /* + * When copying small received packets, we only copy from the + * linear data part of the SKB, so we rely on this condition. + */ + BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); + ret = ipoib_register_debugfs(); if (ret) return ret; From 00f7ec36c9324928e4cd23f02e6d8550f30c32ca Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:45 -0700 Subject: [PATCH 10/52] RDMA/core: Add memory management extensions support This patch adds support for the IB "base memory management extension" (BMME) and the equivalent iWARP operations (which the iWARP verbs mandates all devices must implement). The new operations are: - Allocate an ib_mr for use in fast register work requests. - Allocate/free a physical buffer lists for use in fast register work requests. This allows device drivers to allocate this memory as needed for use in posting send requests (eg via dma_alloc_coherent). - New send queue work requests: * send with remote invalidate * fast register memory region * local invalidate memory region * RDMA read with invalidate local memory region (iWARP only) Consumer interface details: - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added to indicate device support for these features. - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV, IB_WR_RDMA_READ_WITH_INV are added. - A new consumer API function, ib_alloc_mr() is added to allocate fast register memory regions. - New consumer API functions, ib_alloc_fast_reg_page_list() and ib_free_fast_reg_page_list() are added to allocate and free device-specific memory for fast registration page lists. - A new consumer API function, ib_update_fast_reg_key(), is added to allow the key portion of the R_Key and L_Key of a fast registration MR to be updated. Consumers call this if desired before posting a IB_WR_FAST_REG_MR work request. Consumers can use this as follows: - MR is allocated with ib_alloc_mr(). - Page list memory is allocated with ib_alloc_fast_reg_page_list(). - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key(). - MR made VALID and bound to a specific page list via ib_post_send(IB_WR_FAST_REG_MR) - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV), ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with invalidate operation. - MR is deallocated with ib_dereg_mr() - page lists dealloced via ib_free_fast_reg_page_list(). Applications can allocate a fast register MR once, and then can repeatedly bind the MR to different physical block lists (PBLs) via posting work requests to a send queue (SQ). For each outstanding MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be allocated (the fast_reg_page_list is owned by the low-level driver from the consumer posting a work request until the request completes). Thus pipelining can be achieved while still allowing device-specific page_list processing. The 32-bit fast register memory key/STag is composed of a 24-bit index and an 8-bit key. The application can change the key each time it fast registers thus allowing more control over the peer's use of the key/STag (ie it can effectively be changed each time the rkey is rebound to a page list). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 46 ++++++++++++++ drivers/infiniband/hw/ehca/ehca_reqs.c | 2 +- drivers/infiniband/hw/ipath/ipath_cq.c | 2 +- drivers/infiniband/hw/ipath/ipath_rc.c | 4 +- drivers/infiniband/hw/ipath/ipath_ruc.c | 4 +- drivers/infiniband/hw/ipath/ipath_uc.c | 8 +-- drivers/infiniband/hw/ipath/ipath_ud.c | 8 +-- drivers/infiniband/hw/mlx4/cq.c | 12 ++-- drivers/infiniband/hw/mthca/mthca_cq.c | 4 +- include/rdma/ib_user_verbs.h | 5 +- include/rdma/ib_verbs.h | 83 ++++++++++++++++++++++++- 12 files changed, 154 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 112b37cd6895..56feab6c251e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -917,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, resp->wc[i].opcode = wc[i].opcode; resp->wc[i].vendor_err = wc[i].vendor_err; resp->wc[i].byte_len = wc[i].byte_len; - resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; + resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data; resp->wc[i].qp_num = wc[i].qp->qp_num; resp->wc[i].src_qp = wc[i].src_qp; resp->wc[i].wc_flags = wc[i].wc_flags; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9f399d3a42b4..e0fbe5975866 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -753,6 +753,52 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +{ + struct ib_mr *mr; + + if (!pd->device->alloc_fast_reg_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_alloc_fast_reg_mr); + +struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device, + int max_page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + if (!device->alloc_fast_reg_page_list) + return ERR_PTR(-ENOSYS); + + page_list = device->alloc_fast_reg_page_list(device, max_page_list_len); + + if (!IS_ERR(page_list)) { + page_list->device = device; + page_list->max_page_list_len = max_page_list_len; + } + + return page_list; +} +EXPORT_SYMBOL(ib_alloc_fast_reg_page_list); + +void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + page_list->device->free_fast_reg_page_list(page_list); +} +EXPORT_SYMBOL(ib_free_fast_reg_page_list); + /* Memory windows */ struct ib_mw *ib_alloc_mw(struct ib_pd *pd) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index f093b0033daf..b799b2710210 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -681,7 +681,7 @@ poll_cq_one_read_cqe: wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; poll_cq_one_exit0: diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index a03bd28d9b48..d385e4168c97 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->uqueue[head].opcode = entry->opcode; wc->uqueue[head].vendor_err = entry->vendor_err; wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; + wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; wc->uqueue[head].qp_num = entry->qp->qp_num; wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 108df667d2ee..97710522624d 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index a4b5521567fe..af051f757663 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -331,7 +331,7 @@ again: switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: if (!ipath_get_rwqe(qp, 0)) @@ -342,7 +342,7 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 0596ec16fcbd..82cc588b8bf2 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -483,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): rdma_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 77ca8ca74e78..36aa242c487c 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = swqe->wr.ex.imm_data; + wc.ex.imm_data = swqe->wr.ex.imm_data; } /* @@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else - wc.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; hdrsize += sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; + wc.ex.imm_data = 0; wc.wc_flags = 0; } else { dev->n_pkt_drops++; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 4521319b1406..299f20832ab6 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -663,18 +663,18 @@ repoll: switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: - wc->opcode = IB_WC_RECV; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index f788fce71ac7..d9f4735c2b37 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -620,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev, case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV; break; case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; break; default: diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 885254f20bb3..a17f77106149 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -289,7 +289,10 @@ struct ib_uverbs_wc { __u32 opcode; __u32 vendor_err; __u32 byte_len; - __u32 imm_data; + union { + __u32 imm_data; + __u32 invalidate_rkey; + } ex; __u32 qp_num; __u32 src_qp; __u32 wc_flags; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5f5621bf70bd..74c24b908908 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -103,6 +103,7 @@ enum ib_device_cap_flags { */ IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), + IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), }; enum ib_atomic_cap { @@ -148,6 +149,7 @@ struct ib_device_attr { int max_srq; int max_srq_wr; int max_srq_sge; + unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; }; @@ -411,6 +413,8 @@ enum ib_wc_opcode { IB_WC_FETCH_ADD, IB_WC_BIND_MW, IB_WC_LSO, + IB_WC_LOCAL_INV, + IB_WC_FAST_REG_MR, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). @@ -421,7 +425,8 @@ enum ib_wc_opcode { enum ib_wc_flags { IB_WC_GRH = 1, - IB_WC_WITH_IMM = (1<<1) + IB_WC_WITH_IMM = (1<<1), + IB_WC_WITH_INVALIDATE = (1<<2), }; struct ib_wc { @@ -431,7 +436,10 @@ struct ib_wc { u32 vendor_err; u32 byte_len; struct ib_qp *qp; - __be32 imm_data; + union { + __be32 imm_data; + u32 invalidate_rkey; + } ex; u32 src_qp; int wc_flags; u16 pkey_index; @@ -625,6 +633,9 @@ enum ib_wr_opcode { IB_WR_ATOMIC_FETCH_AND_ADD, IB_WR_LSO, IB_WR_SEND_WITH_INV, + IB_WR_RDMA_READ_WITH_INV, + IB_WR_LOCAL_INV, + IB_WR_FAST_REG_MR, }; enum ib_send_flags { @@ -641,6 +652,12 @@ struct ib_sge { u32 lkey; }; +struct ib_fast_reg_page_list { + struct ib_device *device; + u64 *page_list; + unsigned int max_page_list_len; +}; + struct ib_send_wr { struct ib_send_wr *next; u64 wr_id; @@ -673,6 +690,15 @@ struct ib_send_wr { u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; + struct { + u64 iova_start; + struct ib_fast_reg_page_list *page_list; + unsigned int page_shift; + unsigned int page_list_len; + u32 length; + int access_flags; + u32 rkey; + } fast_reg; } wr; }; @@ -1011,6 +1037,11 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); + struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, + int max_page_list_len); + struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, + int page_list_len); + void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); int (*rereg_phys_mr)(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, @@ -1804,6 +1835,54 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); */ int ib_dereg_mr(struct ib_mr *mr); +/** + * ib_alloc_fast_reg_mr - Allocates memory region usable with the + * IB_WR_FAST_REG_MR send work request. + * @pd: The protection domain associated with the region. + * @max_page_list_len: requested max physical buffer list length to be + * used with fast register work requests for this MR. + */ +struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); + +/** + * ib_alloc_fast_reg_page_list - Allocates a page list array + * @device - ib device pointer. + * @page_list_len - size of the page list array to be allocated. + * + * This allocates and returns a struct ib_fast_reg_page_list * and a + * page_list array that is at least page_list_len in size. The actual + * size is returned in max_page_list_len. The caller is responsible + * for initializing the contents of the page_list array before posting + * a send work request with the IB_WC_FAST_REG_MR opcode. + * + * The page_list array entries must be translated using one of the + * ib_dma_*() functions just like the addresses passed to + * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct + * ib_fast_reg_page_list must not be modified by the caller until the + * IB_WC_FAST_REG_MR work request completes. + */ +struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list( + struct ib_device *device, int page_list_len); + +/** + * ib_free_fast_reg_page_list - Deallocates a previously allocated + * page list array. + * @page_list - struct ib_fast_reg_page_list pointer to be deallocated. + */ +void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); + +/** + * ib_update_fast_reg_key - updates the key portion of the fast_reg MR + * R_Key and L_Key. + * @mr - struct ib_mr pointer to be updated. + * @newkey - new key to be used. + */ +static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) +{ + mr->lkey = (mr->lkey & 0xffffff00) | newkey; + mr->rkey = (mr->rkey & 0xffffff00) | newkey; +} + /** * ib_alloc_mw - Allocates a memory window. * @pd: The protection domain associated with the memory window. From e7e55829999deaab3f43e201a087731c02c54cf9 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:45 -0700 Subject: [PATCH 11/52] RDMA/cxgb3: MEM_MGT_EXTENSIONS support - set IB_DEVICE_MEM_MGT_EXTENSIONS capability bit if fw supports it. - set max_fast_reg_page_list_len device attribute. - add iwch_alloc_fast_reg_mr function. - add iwch_alloc_fastreg_pbl - add iwch_free_fastreg_pbl - adjust the WQ depth for kernel mode work queues to account for fastreg possibly taking 2 WR slots. - add fastreg_mr work request support. - add local_inv work request support. - add send_with_inv and send_with_se_inv work request support. - removed useless duplicate enums/defines for TPT/MW/MR stuff. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 15 ++- drivers/infiniband/hw/cxgb3/cxio_hal.h | 1 + drivers/infiniband/hw/cxgb3/cxio_wr.h | 90 ++++++++++--- drivers/infiniband/hw/cxgb3/iwch_cq.c | 15 ++- drivers/infiniband/hw/cxgb3/iwch_provider.c | 104 +++++++++++++- drivers/infiniband/hw/cxgb3/iwch_provider.h | 8 -- drivers/infiniband/hw/cxgb3/iwch_qp.c | 142 +++++++++++++------- 7 files changed, 293 insertions(+), 82 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 3f441fc57c17..340e4181c761 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, + T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, + T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -558,7 +560,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7); + T3_CTL_QP_TID, 7, T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -674,7 +676,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, Q_GENBIT(rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len); + wr_len, T3_SOPEOP); if (flag == T3_COMPLETION_FLAG) ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); len -= 96; @@ -816,6 +818,13 @@ int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) 0, 0); } +int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, + 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); +} + int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) { struct t3_rdma_init_wr *wqe; diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 6e128f6bab05..25a880664e6b 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -165,6 +165,7 @@ int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, u32 pbl_addr); int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index f1a25a821a45..de760e9f1cc6 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -72,7 +72,8 @@ enum t3_wr_opcode { T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP + T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, + T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR } __attribute__ ((packed)); enum t3_rdma_opcode { @@ -89,7 +90,8 @@ enum t3_rdma_opcode { T3_FAST_REGISTER, T3_LOCAL_INV, T3_QP_MOD, - T3_BYPASS + T3_BYPASS, + T3_RDMA_READ_REQ_WITH_INV, } __attribute__ ((packed)); static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) @@ -103,6 +105,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) case T3_WR_BIND: return T3_BIND_MW; case T3_WR_INIT: return T3_RDMA_INIT; case T3_WR_QP_MOD: return T3_QP_MOD; + case T3_WR_FASTREG: return T3_FAST_REGISTER; default: break; } return -1; @@ -170,11 +173,54 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; +#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_FRAG 10 + +struct t3_fastreg_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 len; + __be32 va_base_hi; /* 3 */ + __be32 va_base_lo_fbo; + __be32 page_type_perms; /* 4 */ + __be32 reserved1; + __be64 pbl_addrs[0]; /* 5+ */ +}; + +/* + * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. + */ +struct t3_pbl_frag { + struct fw_riwrh wrh; /* 0 */ + __be64 pbl_addrs[14]; /* 1..14 */ +}; + +#define S_FR_PAGE_COUNT 24 +#define M_FR_PAGE_COUNT 0xff +#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) +#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) + +#define S_FR_PAGE_SIZE 16 +#define M_FR_PAGE_SIZE 0x1f +#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) +#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) + +#define S_FR_TYPE 8 +#define M_FR_TYPE 0x1 +#define V_FR_TYPE(x) ((x) << S_FR_TYPE) +#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) + +#define S_FR_PERMS 0 +#define M_FR_PERMS 0xff +#define V_FR_PERMS(x) ((x) << S_FR_PERMS) +#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) + struct t3_local_inv_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ __be32 stag; /* 2 */ - __be32 reserved3; + __be32 reserved; }; struct t3_rdma_write_wr { @@ -193,7 +239,8 @@ struct t3_rdma_read_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ u8 rdmaop; /* 2 */ - u8 reserved[3]; + u8 local_inv; + u8 reserved[2]; __be32 rem_stag; __be64 rem_to; /* 3 */ __be32 local_stag; /* 4 */ @@ -201,18 +248,6 @@ struct t3_rdma_read_wr { __be64 local_to; /* 5 */ }; -enum t3_addr_type { - T3_VA_BASED_TO = 0x0, - T3_ZERO_BASED_TO = 0x1 -} __attribute__ ((packed)); - -enum t3_mem_perms { - T3_MEM_ACCESS_LOCAL_READ = 0x1, - T3_MEM_ACCESS_LOCAL_WRITE = 0x2, - T3_MEM_ACCESS_REM_READ = 0x4, - T3_MEM_ACCESS_REM_WRITE = 0x8 -} __attribute__ ((packed)); - struct t3_bind_mw_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ @@ -336,6 +371,11 @@ struct t3_genbit { __be64 genbit; }; +struct t3_wq_in_err { + u64 flit[13]; + u64 err; +}; + enum rdma_init_wr_flags { MPA_INITIATOR = (1<<0), PRIV_QP = (1<<1), @@ -346,13 +386,16 @@ union t3_wr { struct t3_rdma_write_wr write; struct t3_rdma_read_wr read; struct t3_receive_wr recv; + struct t3_fastreg_wr fastreg; + struct t3_pbl_frag pbl_frag; struct t3_local_inv_wr local_inv; struct t3_bind_mw_wr bind; struct t3_bypass_wr bypass; struct t3_rdma_init_wr init; struct t3_modify_qp_wr qp_mod; struct t3_genbit genbit; - u64 flit[16]; + struct t3_wq_in_err wq_in_err; + __be64 flit[16]; }; #define T3_SQ_CQE_FLIT 13 @@ -366,12 +409,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); } +enum t3_wr_hdr_bits { + T3_EOP = 1, + T3_SOP = 2, + T3_SOPEOP = T3_EOP|T3_SOP, +}; + static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len) + u8 len, u8 sopeop) { wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | + V_FW_RIWR_SOPEOP(sopeop) | V_FW_RIWR_FLAGS(flags)); wmb(); wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | @@ -404,6 +453,7 @@ enum tpt_addr_type { }; enum tpt_mem_perm { + TPT_MW_BIND = 0x10, TPT_LOCAL_READ = 0x8, TPT_LOCAL_WRITE = 0x4, TPT_REMOTE_READ = 0x2, @@ -659,7 +709,7 @@ struct t3_cq { static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->flit[13] = 1; + wq->queue->wq_in_err.err = 1; } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 4ee8ccd0a9e5..cf5474ae68ff 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->wr_id = cookie; wc->qp = &qhp->ibqp; wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%llx\n", __func__, @@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, else wc->byte_len = 0; wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || + CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } } else { switch (CQE_OPCODE(cqe)) { case T3_RDMA_WRITE: @@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, break; case T3_SEND: case T3_SEND_WITH_SE: + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: wc->opcode = IB_WC_SEND; break; case T3_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; - /* these aren't supported yet */ - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: case T3_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; case T3_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; default: printk(KERN_ERR MOD "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 95f82cfb6c54..5d504f3ed68b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -768,6 +768,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw) return 0; } +static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, pbl_depth); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pbl_size = pbl_depth; + ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) { + iwch_free_pbl(mhp); + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_NON_SHARED_MR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +} + +static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), + GFP_KERNEL); + if (!page_list) + return ERR_PTR(-ENOMEM); + + page_list->page_list = (u64 *)(page_list + 1); + page_list->max_page_list_len = page_list_len; + + return page_list; +} + +static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; @@ -843,6 +905,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, */ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); wqsize = roundup_pow_of_two(rqsize + sqsize); + + /* + * Kernel users need more wq space for fastreg WRs which can take + * 2 WR fragments. + */ + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (!ucontext && wqsize < (rqsize + (2 * sqsize))) + wqsize = roundup_pow_of_two(rqsize + + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, wqsize, sqsize, rqsize); qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); @@ -851,7 +922,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->wq.size_log2 = ilog2(wqsize); qhp->wq.rq_size_log2 = ilog2(rqsize); qhp->wq.sq_size_log2 = ilog2(sqsize); - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { kfree(qhp); @@ -1048,6 +1118,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_mr = dev->attr.max_mem_regs; props->max_pd = dev->attr.max_pds; props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; return 0; } @@ -1088,6 +1159,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } +static int fw_supports_fastreg(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version+1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + + PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min); + + return fw_maj > 6 || (fw_maj == 6 && fw_min > 0); +} + static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, @@ -1149,8 +1242,10 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = - (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW); + dev->device_cap_flags = IB_DEVICE_ZERO_STAG | + IB_DEVICE_MEM_WINDOW; + if (fw_supports_fastreg(dev)) + dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1202,6 +1297,9 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; + dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 836163fc5429..f5ceca05c435 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc) TPT_LOCAL_READ; } -static inline u32 iwch_ib_to_mwbind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | - T3_MEM_ACCESS_LOCAL_READ; -} - enum iwch_mmid_state { IWCH_STAG_STATE_VALID, IWCH_STAG_STATE_INVALID diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 992613799228..3b44300a3036 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -44,54 +44,39 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE; else wqe->send.rdmaop = T3_SEND; wqe->send.rem_stag = 0; break; -#if 0 /* Not currently supported */ - case TYPE_SEND_INVALIDATE: - case TYPE_SEND_INVALIDATE_IMMEDIATE: - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + else + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); break; - case TYPE_SEND_SE_INVALIDATE: - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - break; -#endif default: - break; + return -EINVAL; } if (wr->num_sge > T3_MAX_SGE) return -EINVAL; wqe->send.reserved[0] = 0; wqe->send.reserved[1] = 0; wqe->send.reserved[2] = 0; - if (wr->opcode == IB_WR_SEND_WITH_IMM) { - plen = 4; - wqe->send.sgl[0].stag = wr->ex.imm_data; - wqe->send.sgl[0].len = __constant_cpu_to_be32(0); - wqe->send.num_sgle = __constant_cpu_to_be32(0); - *flit_cnt = 5; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) + return -EMSGSIZE; + + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } + wqe->send.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); wqe->send.plen = cpu_to_be32(plen); return 0; } @@ -143,9 +128,12 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, if (wr->num_sge > 1) return -EINVAL; wqe->read.rdmaop = T3_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + wqe->read.local_inv = 1; + else + wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.reserved[2] = 0; wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); @@ -155,6 +143,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } +static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + int i; + __be64 *p; + + if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | + V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + } + *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + +static int iwch_build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->local_inv.reserved = 0; + *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; + return 0; +} + /* * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. */ @@ -238,6 +277,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 num_wrs; unsigned long flag; struct t3_swsq *sqp; + int wr_cnt = 1; qhp = to_iwch_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); @@ -262,15 +302,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t3_wr_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) t3_wr_flags |= T3_COMPLETION_FLAG; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; t3_wr_opcode = T3_WR_SEND; err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; @@ -280,6 +320,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); @@ -289,6 +330,17 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; + case IB_WR_FAST_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = iwch_build_fastreg(wqe, wr, &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_LOCAL_FENCE_FLAG; + t3_wr_opcode = T3_WR_INV_STAG; + err = iwch_build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + break; default: PDBG("%s post of type=%d TBD!\n", __func__, wr->opcode); @@ -307,14 +359,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt); + 0, t3_wr_flit_cnt, + (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", __func__, (unsigned long long) wr->wr_id, idx, Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), sqp->opcode); wr = wr->next; num_wrs--; - ++(qhp->wq.wptr); + qhp->wq.wptr += wr_cnt; ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); @@ -359,7 +412,7 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wr->wr_id; build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3); + 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " "wqe %p \n", __func__, (unsigned long long) wr->wr_id, idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); @@ -419,10 +472,10 @@ int iwch_bind_mw(struct ib_qp *qp, sgl.lkey = mw_bind->mr->lkey; sgl.length = mw_bind->length; wqe->bind.reserved = 0; - wqe->bind.type = T3_VA_BASED_TO; + wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); + wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags); wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); wqe->bind.mw_len = cpu_to_be32(mw_bind->length); @@ -430,7 +483,7 @@ int iwch_bind_mw(struct ib_qp *qp, err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); - return err; + return err; } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); @@ -441,10 +494,9 @@ int iwch_bind_mw(struct ib_qp *qp, sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); wqe->bind.mr_pagesz = page_size; - wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, - sizeof(struct t3_bind_mw_wr) >> 3); + sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); From d1f2cd895f8733faa9d79d09d825a2ed80002ac7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:45 -0700 Subject: [PATCH 12/52] IB/mlx4: Configure QPs' max message size based on real device capability ConnectX returns the max message size it supports through the QUERY_DEV_CAP firmware command. When modifying a QP to RTR, the max message size for the QP must be specified. This value must not exceed the value declared through QUERY_DEV_CAP. The current code ignores the max allowed size and unconditionally sets the value to 2^31. This patch sets all QPs to the max value allowed as returned from firmware. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4b0ac5d68c46..76054a72f71b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -908,7 +908,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, attr->path_mtu); goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | 31; + context->mtu_msgmax = (attr->path_mtu << 5) | + ilog2(dev->dev->caps.max_msg_sz); } if (qp->rq.wqe_cnt) From 6578cf33989a594bab37af988d45d87812b946b8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:45 -0700 Subject: [PATCH 13/52] IB/mlx4: Pass congestion management class MADs to the HCA ConnectX HCAs support the IB_MGMT_CLASS_CONG_MGMT management class, so process MADs of this class through the MAD_IFC firmware command. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 4c1e72fc8f57..cdca3a511e1c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; From e5a5e7d59af5944a674b9cea420a1fedc60496f2 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 14 Jul 2008 23:48:46 -0700 Subject: [PATCH 14/52] IB/core: Reset to error QP state transition is not allowed I was reviewing the QP state transition diagram in the IB 1.2.1 spec and the code for qp_state_table[], and noticed that the code allows a QP to be modified from IB_QPS_RESET to IB_QPS_ERR whereas the notes for figure 124 (pg 457) specifically says that this transition isn't allowed. This is a clarification from earlier versions of the IB spec, which were ambiguous in this area and suggested that the RESET to ERR transition was allowed. Fix up the qp_state_table[] to make RESET->ERR not allowed. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e0fbe5975866..a7da9be43e61 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -315,7 +315,6 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, - [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { From d3809ad0972297fbc7ef0585049ef465d9d8d79d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:46 -0700 Subject: [PATCH 15/52] IB/mthca: Remove extra code for RESET->ERR QP state transition Commit b18aad71 ("IB/mthca: Fix RESET to ERROR transition") added some extra code to handle a QP state transition from RESET to ERROR. However, the latest 1.2.1 version of the IB spec has clarified that this transition is actually not allowed, so we can remove this extra code again. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 3b1c5baf13b1..77b52b3adcc7 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -848,23 +848,6 @@ out: return err; } -static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 }; -static const int dummy_init_attr_mask[] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -926,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mthca_modify_qp(ibqp, &dummy_init_attr, - dummy_init_attr_mask[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: From 7c27f358209a8ce7c57b584346d7b611e823f1b1 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:46 -0700 Subject: [PATCH 16/52] IB/mlx4: Remove extra code for RESET->ERR QP state transition Commit 65adfa91 ("IB/mlx4: Fix RESET to RESET and RESET to ERROR transitions") added some extra code to handle a QP state transition from RESET to ERROR. However, the latest 1.2.1 version of the IB spec has clarified that this transition is actually not allowed, so we can remove this extra code again. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 76054a72f71b..44bbd6c2e315 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1132,23 +1132,6 @@ out: return err; } -static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; -static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -1191,15 +1174,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, - mlx4_ib_qp_attr_mask_table[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: From 3e255eac561672cbc92844b9f16cae9304c2a783 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 17/52] IB/ehca: Reject receive work requests if QP is in RESET state Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_reqs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index b799b2710210..dd9bc68f1c7b 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -544,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), - qp->device, recv_wr, bad_recv_wr); + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); } int ehca_post_srq_recv(struct ib_srq *srq, From 6f7bc01a7382641c61ec036d68ff3a9140b48a1c Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 18/52] IB/ehca: In case of lost interrupts, trigger EOI to reenable interrupts During corner case testing, we noticed that some versions of ehca do not properly transition to interrupt done in special load situations. This can be resolved by periodically triggering EOI through H_EOI, if EQEs are pending. Signed-off-by: Stefan Roscher Acked-by: Benjamin Herrenschmidt Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_irq.c | 9 +++++++-- drivers/infiniband/hw/ehca/hcp_if.c | 10 ++++++++++ drivers/infiniband/hw/ehca/hcp_if.h | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ce1ab0571be3..0792d930c481 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -531,7 +531,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; + u64 eqe_value, ret; unsigned long flags; int eqe_cnt, i; int eq_empty = 0; @@ -583,8 +583,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %ld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5245e13c3a30..415d3a465de6 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, r_cb, 0, 0, 0, 0); } + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 60ce02b70663..2c3c6e0ea5c2 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, const u64 ressource_handle, void *rblock, unsigned long *byte_count); +u64 hipz_h_eoi(int irq); #endif /* __HCP_IF_H__ */ From 97d1cc8055c7b3fbd35bf693775d61102e65d174 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 19/52] RDMA/cxgb3: Fix up some ib_device_attr fields - set fw_ver - set hw_ver - set max_qp_wr to something reasonable - set max_cqe to something reasonable Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.h | 3 ++- drivers/infiniband/hw/cxgb3/iwch.c | 4 ++-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 25 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 25a880664e6b..a9ff32c36211 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -45,13 +45,14 @@ #define T3_CTRL_QP_SIZE_LOG2 8 #define T3_CTRL_CQ_ID 0 -/* TBD */ #define T3_MAX_NUM_RI (1<<15) #define T3_MAX_NUM_QP (1<<15) #define T3_MAX_NUM_CQ (1<<15) #define T3_MAX_NUM_PD (1<<15) #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) +#define T3_MAX_CQ_DEPTH 8192 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 71554eacb13c..e09cc1a81999 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -74,11 +74,11 @@ static void rnic_init(struct iwch_dev *rnicp) rnicp->attr.vendor_id = 0x168; rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = (1UL << 24) - 1; + rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; + rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 5d504f3ed68b..5f4380657392 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1093,6 +1093,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port, return 0; } +static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min, fw_mic; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version + 1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_mic); + + return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | + (fw_mic & 0xffff); +} + static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -1103,6 +1126,8 @@ static int iwch_query_device(struct ib_device *ibdev, dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + props->hw_ver = dev->rdev.t3cdev_p->type; + props->fw_ver = fw_vers_string_to_u64(dev); props->device_cap_flags = dev->device_cap_flags; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; From eec8845d29504a12fbd434e192d61aed3d9d74fa Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 20/52] RDMA/cxgb3: Remove write-only iwch_rnic_attributes fields The members struct iwch_rnic_attributes.vendor_id and .vendor_part_id are write-only, so we might as well get rid of them. Signed-off-by: Roland Dreier Acked-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch.c | 2 -- drivers/infiniband/hw/cxgb3/iwch.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index e09cc1a81999..06086256ca1d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -71,8 +71,6 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); - rnicp->attr.vendor_id = 0x168; - rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index d2409a505e8d..3773453b2cf0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -48,8 +48,6 @@ struct iwch_qp; struct iwch_mr; struct iwch_rnic_attributes { - u32 vendor_id; - u32 vendor_part_id; u32 max_qps; u32 max_wrs; /* Max for any SQ/RQ */ u32 max_sge_per_wr; From 468f2239bcc71ae0f345c3fe58c797cf4627daf4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 21/52] RDMA/cma: Add missing newlines to printk()s Signed-off-by: Roland Dreier Acked-by: Sean Hefty --- drivers/infiniband/core/cma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e5bd6172a1f6..44d190f67810 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -974,7 +974,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } @@ -1450,7 +1450,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(id, id_priv->backlog); if (ret) printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " - "listening on device %s", ret, cma_dev->device->name); + "listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) @@ -2155,7 +2155,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = 0; break; default: - printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", + printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } From a7d834c4bc6be73e8f83eaa5072fac3c5549f7f2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:47 -0700 Subject: [PATCH 22/52] IPoIB/cm: Fix racy use of receive WR/SGL in ipoib_cm_post_receive_nonsrq() For devices that don't support SRQs, ipoib_cm_post_receive_nonsrq() is called from both ipoib_cm_handle_rx_wc() and ipoib_cm_nonsrq_init_rx(), and these two callers are not synchronized against each other. However, ipoib_cm_post_receive_nonsrq() always reuses the same receive work request and scatter list structures, so multiple callers can end up stepping on each other, which leads to posting garbled work requests. Fix this by having the caller pass in the ib_recv_wr and ib_sge structures to use, and allocating new local structures in ipoib_cm_nonsrq_init_rx(). Based on a patch by Pradeep Satyanarayana and David Wilder , with debugging help from Hoang-Nam Nguyen . Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 63 ++++++++++++++++++------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6223fc39af70..37bf67b2a26f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -111,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) } static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, - struct ipoib_cm_rx *rx, int id) + struct ipoib_cm_rx *rx, + struct ib_recv_wr *wr, + struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; int i, ret; - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < IPOIB_CM_RX_SG; ++i) - priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; + sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -320,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev, return 0; } +static void ipoib_cm_init_rx_wr(struct net_device *dev, + struct ib_recv_wr *wr, + struct ib_sge *sge) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->cm.num_frags; ++i) + sge[i].lkey = priv->mr->lkey; + + sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + sge[i].length = PAGE_SIZE; + + wr->next = NULL; + wr->sg_list = priv->cm.rx_sge; + wr->num_sge = priv->cm.num_frags; +} + static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct { + struct ib_recv_wr wr; + struct ib_sge sge[IPOIB_CM_RX_SG]; + } *t; int ret; int i; @@ -331,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; + t = kmalloc(sizeof *t, GFP_KERNEL); + if (!t) { + ret = -ENOMEM; + goto err_free; + } + + ipoib_cm_init_rx_wr(dev, &t->wr, t->sge); + spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { @@ -349,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; - } - ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); + } + ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); @@ -361,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->recv_count = ipoib_recvq_size; + kfree(t); + return 0; err_count: @@ -369,6 +404,7 @@ err_count: spin_unlock_irq(&priv->lock); err_free: + kfree(t); ipoib_cm_free_rx_ring(dev, rx->rx_ring); return ret; @@ -637,7 +673,10 @@ repost: ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } else { - if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { + if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, + &priv->cm.rx_wr, + priv->cm.rx_sge, + wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); @@ -1502,15 +1541,7 @@ int ipoib_cm_dev_init(struct net_device *dev) priv->cm.num_frags = IPOIB_CM_RX_SG; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; - - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { From 7f624d023b5fb150831e02c1e4c0f2619ade72c2 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 23/52] RDMA/core: Add iWARP protocol statistics attributes in sysfs This patch adds a sysfs attribute group called "proto_stats" under /sys/class/infiniband/$device/ and populates this group with protocol statistics if they exist for a given device. Currently, only iWARP stats are defined, but the code is designed to allow InfiniBand protocol stats if they become available. These stats are per-device and more importantly -not- per port. Details: - Add union rdma_protocol_stats in ib_verbs.h. This union allows defining transport-specific stats. Currently only iwarp stats are defined. - Add struct iw_protocol_stats to define the current set of iwarp protocol stats. - Add new ib_device method called get_proto_stats() to return protocol statistics. - Add logic in core/sysfs.c to create iwarp protocol stats attributes if the device is an RNIC and has a get_proto_stats() method. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 120 ++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 53 ++++++++++++++ 2 files changed, 173 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 36a0ef97c6a5..4d1042115598 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -663,6 +663,120 @@ static struct class ib_class = { .dev_uevent = ib_device_uevent, }; +/* Show a given an attribute in the statistics group */ +static ssize_t show_protocol_stat(const struct device *device, + struct device_attribute *attr, char *buf, + unsigned offset) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + union rdma_protocol_stats stats; + ssize_t ret; + + ret = dev->get_protocol_stats(dev, &stats); + if (ret) + return ret; + + return sprintf(buf, "%llu\n", + (unsigned long long) ((u64 *) &stats)[offset]); +} + +/* generate a read-only iwarp statistics attribute */ +#define IW_STATS_ENTRY(name) \ +static ssize_t show_##name(struct device *device, \ + struct device_attribute *attr, char *buf) \ +{ \ + return show_protocol_stat(device, attr, buf, \ + offsetof(struct iw_protocol_stats, name) / \ + sizeof (u64)); \ +} \ +static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +IW_STATS_ENTRY(ipInReceives); +IW_STATS_ENTRY(ipInHdrErrors); +IW_STATS_ENTRY(ipInTooBigErrors); +IW_STATS_ENTRY(ipInNoRoutes); +IW_STATS_ENTRY(ipInAddrErrors); +IW_STATS_ENTRY(ipInUnknownProtos); +IW_STATS_ENTRY(ipInTruncatedPkts); +IW_STATS_ENTRY(ipInDiscards); +IW_STATS_ENTRY(ipInDelivers); +IW_STATS_ENTRY(ipOutForwDatagrams); +IW_STATS_ENTRY(ipOutRequests); +IW_STATS_ENTRY(ipOutDiscards); +IW_STATS_ENTRY(ipOutNoRoutes); +IW_STATS_ENTRY(ipReasmTimeout); +IW_STATS_ENTRY(ipReasmReqds); +IW_STATS_ENTRY(ipReasmOKs); +IW_STATS_ENTRY(ipReasmFails); +IW_STATS_ENTRY(ipFragOKs); +IW_STATS_ENTRY(ipFragFails); +IW_STATS_ENTRY(ipFragCreates); +IW_STATS_ENTRY(ipInMcastPkts); +IW_STATS_ENTRY(ipOutMcastPkts); +IW_STATS_ENTRY(ipInBcastPkts); +IW_STATS_ENTRY(ipOutBcastPkts); +IW_STATS_ENTRY(tcpRtoAlgorithm); +IW_STATS_ENTRY(tcpRtoMin); +IW_STATS_ENTRY(tcpRtoMax); +IW_STATS_ENTRY(tcpMaxConn); +IW_STATS_ENTRY(tcpActiveOpens); +IW_STATS_ENTRY(tcpPassiveOpens); +IW_STATS_ENTRY(tcpAttemptFails); +IW_STATS_ENTRY(tcpEstabResets); +IW_STATS_ENTRY(tcpCurrEstab); +IW_STATS_ENTRY(tcpInSegs); +IW_STATS_ENTRY(tcpOutSegs); +IW_STATS_ENTRY(tcpRetransSegs); +IW_STATS_ENTRY(tcpInErrs); +IW_STATS_ENTRY(tcpOutRsts); + +static struct attribute *iw_proto_stats_attrs[] = { + &dev_attr_ipInReceives.attr, + &dev_attr_ipInHdrErrors.attr, + &dev_attr_ipInTooBigErrors.attr, + &dev_attr_ipInNoRoutes.attr, + &dev_attr_ipInAddrErrors.attr, + &dev_attr_ipInUnknownProtos.attr, + &dev_attr_ipInTruncatedPkts.attr, + &dev_attr_ipInDiscards.attr, + &dev_attr_ipInDelivers.attr, + &dev_attr_ipOutForwDatagrams.attr, + &dev_attr_ipOutRequests.attr, + &dev_attr_ipOutDiscards.attr, + &dev_attr_ipOutNoRoutes.attr, + &dev_attr_ipReasmTimeout.attr, + &dev_attr_ipReasmReqds.attr, + &dev_attr_ipReasmOKs.attr, + &dev_attr_ipReasmFails.attr, + &dev_attr_ipFragOKs.attr, + &dev_attr_ipFragFails.attr, + &dev_attr_ipFragCreates.attr, + &dev_attr_ipInMcastPkts.attr, + &dev_attr_ipOutMcastPkts.attr, + &dev_attr_ipInBcastPkts.attr, + &dev_attr_ipOutBcastPkts.attr, + &dev_attr_tcpRtoAlgorithm.attr, + &dev_attr_tcpRtoMin.attr, + &dev_attr_tcpRtoMax.attr, + &dev_attr_tcpMaxConn.attr, + &dev_attr_tcpActiveOpens.attr, + &dev_attr_tcpPassiveOpens.attr, + &dev_attr_tcpAttemptFails.attr, + &dev_attr_tcpEstabResets.attr, + &dev_attr_tcpCurrEstab.attr, + &dev_attr_tcpInSegs.attr, + &dev_attr_tcpOutSegs.attr, + &dev_attr_tcpRetransSegs.attr, + &dev_attr_tcpInErrs.attr, + &dev_attr_tcpOutRsts.attr, + NULL +}; + +static struct attribute_group iw_stats_group = { + .name = "proto_stats", + .attrs = iw_proto_stats_attrs, +}; + int ib_device_register_sysfs(struct ib_device *device) { struct device *class_dev = &device->dev; @@ -705,6 +819,12 @@ int ib_device_register_sysfs(struct ib_device *device) } } + if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { + ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); + if (ret) + goto err_put; + } + return 0; err_put: diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 74c24b908908..10ebaaae0161 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -226,6 +226,57 @@ static inline int ib_width_enum_to_int(enum ib_port_width width) } } +struct ib_protocol_stats { + /* TBD... */ +}; + +struct iw_protocol_stats { + u64 ipInReceives; + u64 ipInHdrErrors; + u64 ipInTooBigErrors; + u64 ipInNoRoutes; + u64 ipInAddrErrors; + u64 ipInUnknownProtos; + u64 ipInTruncatedPkts; + u64 ipInDiscards; + u64 ipInDelivers; + u64 ipOutForwDatagrams; + u64 ipOutRequests; + u64 ipOutDiscards; + u64 ipOutNoRoutes; + u64 ipReasmTimeout; + u64 ipReasmReqds; + u64 ipReasmOKs; + u64 ipReasmFails; + u64 ipFragOKs; + u64 ipFragFails; + u64 ipFragCreates; + u64 ipInMcastPkts; + u64 ipOutMcastPkts; + u64 ipInBcastPkts; + u64 ipOutBcastPkts; + + u64 tcpRtoAlgorithm; + u64 tcpRtoMin; + u64 tcpRtoMax; + u64 tcpMaxConn; + u64 tcpActiveOpens; + u64 tcpPassiveOpens; + u64 tcpAttemptFails; + u64 tcpEstabResets; + u64 tcpCurrEstab; + u64 tcpInSegs; + u64 tcpOutSegs; + u64 tcpRetransSegs; + u64 tcpInErrs; + u64 tcpOutRsts; +}; + +union rdma_protocol_stats { + struct ib_protocol_stats ib; + struct iw_protocol_stats iw; +}; + struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; @@ -943,6 +994,8 @@ struct ib_device { struct iw_cm_verbs *iwcm; + int (*get_protocol_stats)(struct ib_device *device, + union rdma_protocol_stats *stats); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr); int (*query_port)(struct ib_device *device, From 14cc180f7b032f8484c1a3d0533b1129ffe307fd Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 24/52] RDMA/cxgb3: Add support for protocol statistics - Add a new rdma ctl command called RDMA_GET_MIB to the cxgb3 low level driver to obtain the protocol mib from the rnic hardware. - Add new iw_cxgb3 provider method to get the MIB from the low level driver. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 62 +++++++++++++++++++-- drivers/net/cxgb3/cxgb3_ctl_defs.h | 1 + drivers/net/cxgb3/cxgb3_offload.c | 7 +++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 5f4380657392..18a6609f5e01 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -56,6 +56,7 @@ #include "iwch_provider.h" #include "iwch_cm.h" #include "iwch_user.h" +#include "common.h" static int iwch_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, @@ -1245,6 +1246,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, iwch_dev->rdev.rnic_info.pdev->device); } +static int iwch_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct iwch_dev *dev; + struct tp_mib_stats m; + int ret; + + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); + if (ret) + return -ENOSYS; + + memset(stats, 0, sizeof *stats); + stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + + m.ipInReceive_lo; + stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + + m.ipInHdrErrors_lo; + stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + + m.ipInAddrErrors_lo; + stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + + m.ipInUnknownProtos_lo; + stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + + m.ipInDiscards_lo; + stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + + m.ipInDelivers_lo; + stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + + m.ipOutRequests_lo; + stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + + m.ipOutDiscards_lo; + stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + + m.ipOutNoRoutes_lo; + stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; + stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; + stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; + stats->iw.ipReasmFails = (u64) m.ipReasmFails; + stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; + stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; + stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; + stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; + stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; + stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; + stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + + m.tcpInSegs_lo; + stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + + m.tcpOutSegs_lo; + stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + + m.tcpRetransSeg_lo; + stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + + m.tcpInErrs_lo; + stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; + stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; + return 0; +} + static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); @@ -1254,7 +1310,7 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, - &dev_attr_board_id + &dev_attr_board_id, }; int iwch_register_device(struct iwch_dev *dev) @@ -1325,15 +1381,13 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; - dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; - dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; - + dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h index 6c4f32066919..ed0ecd9679cb 100644 --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h @@ -54,6 +54,7 @@ enum { RDMA_CQ_DISABLE = 16, RDMA_CTRL_QP_SETUP = 17, RDMA_GET_MEM = 18, + RDMA_GET_MIB = 19, GET_RX_PAGE_INFO = 50, }; diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c index ff9c013ce535..cf2696873796 100644 --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -303,6 +303,12 @@ static int cxgb_rdma_ctl(struct adapter *adapter, unsigned int req, void *data) spin_unlock_irq(&adapter->sge.reg_lock); break; } + case RDMA_GET_MIB: { + spin_lock(&adapter->stats_lock); + t3_tp_get_mib_stats(adapter, (struct tp_mib_stats *)data); + spin_unlock(&adapter->stats_lock); + break; + } default: ret = -EOPNOTSUPP; } @@ -381,6 +387,7 @@ static int cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data) case RDMA_CQ_DISABLE: case RDMA_CTRL_QP_SETUP: case RDMA_GET_MEM: + case RDMA_GET_MIB: if (!offload_running(adapter)) return -EAGAIN; return cxgb_rdma_ctl(adapter, req, data); From 47ee1b9f2e7bf73950602efe0b74fa1a8481f222 Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 25/52] IB/core: Add support for multicast loopback blocking This patch also adds a creation flag for QPs, IB_QP_CREATE_MULTICAST_BLOCK_LOOPBACK, which when set means that multicast sends from the QP to a group that the QP is attached to will not be looped back to the QP's receive queue. This can be used to save receive resources when a consumer does not want a local copy of multicast traffic; for example IPoIB must waste CPU time throwing away such local copies of multicast traffic. This patch also adds a device capability flag that shows whether a device supports this feature or not. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 10ebaaae0161..07b41e05565a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -104,6 +104,7 @@ enum ib_device_cap_flags { IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), }; enum ib_atomic_cap { @@ -555,7 +556,8 @@ enum ib_qp_type { }; enum ib_qp_create_flags { - IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, + IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct ib_qp_init_attr { From 521e575b9a7324a0bca762622139f69582a042bf Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 26/52] IB/mlx4: Add support for blocking multicast loopback packets Add support for handling the IB_QP_CREATE_MULTICAST_BLOCK_LOOPBACK flag by using the per-multicast group loopback blocking feature of mlx4 hardware. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 7 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 ++- drivers/infiniband/hw/mlx4/qp.c | 21 ++++++++++++++++++--- drivers/net/mlx4/mcg.c | 17 +++++++++++++---- include/linux/mlx4/device.h | 3 ++- 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4d61e32866c6..bcf50648fa18 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -90,7 +90,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN; + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -437,7 +438,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + &to_mqp(ibqp)->mqp, gid->raw, + !!(to_mqp(ibqp)->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 5cf994794d25..c4cf5b69eefa 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -101,7 +101,8 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = 1 << 0 + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct mlx4_ib_qp { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 44bbd6c2e315..91590e7fba0c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -511,6 +511,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -684,10 +687,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp; int err; - /* We only support LSO, and only for kernel UD QPs. */ - if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO) + /* + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. + */ + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) return ERR_PTR(-EINVAL); - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO && + + if (init_attr->create_flags && (pd->uobject || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); @@ -1844,6 +1852,13 @@ done: qp_init_attr->cap = qp_attr->cap; + qp_init_attr->create_flags = 0; + if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + if (qp->flags & MLX4_IB_QP_LSO) + qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index 57f7f1f0d4ec..b4b57870ddfd 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -38,6 +38,9 @@ #include "mlx4.h" +#define MGM_QPN_MASK 0x00FFFFFF +#define MGM_BLCK_LB_BIT 30 + struct mlx4_mgm { __be32 next_gid_index; __be32 members_count; @@ -153,7 +156,8 @@ static int find_mgm(struct mlx4_dev *dev, return err; } -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; @@ -202,13 +206,18 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) } for (i = 0; i < members_count; ++i) - if (mgm->qp[i] == cpu_to_be32(qp->qpn)) { + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); err = 0; goto out; } - mgm->qp[members_count++] = cpu_to_be32(qp->qpn); + if (block_mcast_loopback) + mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | + (1 << MGM_BLCK_LB_BIT)); + else + mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK); + mgm->members_count = cpu_to_be32(members_count); err = mlx4_WRITE_MCG(dev, index, mailbox); @@ -283,7 +292,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) members_count = be32_to_cpu(mgm->members_count); for (loc = -1, i = 0; i < members_count; ++i) - if (mgm->qp[i] == cpu_to_be32(qp->qpn)) + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) loc = i; if (loc == -1) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a744383d16e9..81b3dd5206e0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -398,7 +398,8 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, From 12406734051a26e9fe4c8568e931dfddbb72d431 Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 27/52] IPoIB: Use multicast loopback blocking if available Set IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK for IPoIB's UD QPs if supported by the underlying device. This creates an improvement of up to 39% in bandwidth when sending multicast packets with IPoIB, and an improvment of 12% in cpu usage. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 810790ae7530..7b8fa36f509b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -199,7 +199,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) - init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; + init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + + if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; From af40da894e96d5c826d38be3ea53ee00d9de0367 Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: [PATCH 28/52] IPoIB: add LRO support Add "ipoib_use_lro" module parameter to enable LRO and an "ipoib_lro_max_aggr" module parameter to set the max number of packets to be aggregated. Make LRO controllable and LRO statistics accessible through ethtool. Signed-off-by: Vladimir Sokolovsky Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/Kconfig | 1 + drivers/infiniband/ulp/ipoib/ipoib.h | 11 ++++ drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 46 +++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 62 ++++++++++++++++++++ 5 files changed, 127 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f3..691525cf394a 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" depends on NETDEVICES && INET && (IPV6 || IPV6=n) + select INET_LRO ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8754b364f229..2c522572e3c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -50,6 +50,7 @@ #include #include #include +#include /* constants */ @@ -94,6 +95,9 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, }; @@ -248,6 +252,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -334,6 +343,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44d..66af5c1a76e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, return 0; } +static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { + "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no desc" +}; + +static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); + break; + } +} + +static int ipoib_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ipoib_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ipoib_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int index = 0; + + /* Get LRO statistics */ + data[index++] = priv->lro.lro_mgr.stats.aggregated; + data[index++] = priv->lro.lro_mgr.stats.flushed; + if (priv->lro.lro_mgr.stats.flushed) + data[index++] = priv->lro.lro_mgr.stats.aggregated / + priv->lro.lro_mgr.stats.flushed; + else + data[index++] = 0; + data[index++] = priv->lro.lro_mgr.stats.no_desc; +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_tso = ethtool_op_get_tso, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = ipoib_get_strings, + .get_sset_count = ipoib_get_sset_count, + .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index eca8518d79a0..5d50e5261eed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_receive_skb(skb); + if (dev->features & NETIF_F_LRO) + lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -440,6 +443,9 @@ poll_more: } if (done < budget) { + if (dev->features & NETIF_F_LRO) + lro_flush_all(&priv->lro.lro_mgr); + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bfe1dbf99207..fead88f7fb17 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +static int lro; +module_param(lro, bool, 0444); +MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); + +static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; +module_param(lro_max_aggr, int, 0644); +MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " + "(default = 64)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + + if (unlikely(skb->protocol != htons(ETH_P_IP))) + return -1; + + /* + * In the future we may add an else clause that verifies the + * checksum and allows devices which do not calculate checksum + * to use LRO. + */ + if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) + return -1; + + /* Check for non-TCP packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if IP header and TCP header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ipoib_lro_setup(struct ipoib_dev_priv *priv) +{ + priv->lro.lro_mgr.max_aggr = lro_max_aggr; + priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; + priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; + priv->lro.lro_mgr.get_skb_header = get_skb_hdr; + priv->lro.lro_mgr.features = LRO_F_NAPI; + priv->lro.lro_mgr.dev = priv->dev; + priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; +} + static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; + ipoib_lro_setup(priv); + spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); @@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } + if (lro) + priv->dev->features |= NETIF_F_LRO; + /* * Set the full membership bit, so that we join the right * broadcast group, etc. From 038919f29682b00ea95506e959210fc72d1aaf64 Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 29/52] IB/ehca: Make device table externally visible This gives ehca an autogenerated modalias and therefore enables automatic loading. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 482103eb6eac..598844d2edc9 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] = }, {}, }; +MODULE_DEVICE_TABLE(of, ehca_device_table); static struct of_platform_driver ehca_driver = { .name = "ehca", From ee1e2c82c245a5fb2864e9dbcdaab3390fde3fcc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 30/52] IPoIB: Refresh paths instead of flushing them on SM change events The patch tries to solve the problem of device going down and paths being flushed on an SM change event. The method is to mark the paths as candidates for refresh (by setting the new valid flag to 0), and wait for an ARP probe a new path record query. The solution requires a different and less intrusive handling of SM change event. For that, the second argument of the flush function changes its meaning from a boolean flag to a level. In most cases, SM failover doesn't cause LID change so traffic won't stop. In the rare cases of LID change, the remote host (the one that hadn't changed its LID) will lose connectivity until paths are refreshed. This is no worse than the current state. In fact, preventing the device from going down saves packets that otherwise would be lost. Signed-off-by: Moni Levy Signed-off-by: Moni Shoua Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 17 +++++++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 42 +++++++++++++-------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 44 ++++++++++++++++++++-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 18 +++++---- 4 files changed, 91 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2c522572e3c5..bb19587c5eaf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -54,6 +54,12 @@ /* constants */ +enum ipoib_flush_level { + IPOIB_FLUSH_LIGHT, + IPOIB_FLUSH_NORMAL, + IPOIB_FLUSH_HEAVY +}; + enum { IPOIB_ENCAP_LEN = 4, @@ -284,10 +290,11 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; - struct work_struct flush_task; + struct work_struct flush_light; + struct work_struct flush_normal; + struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - struct work_struct pkey_event_task; struct ib_device *ca; u8 port; @@ -369,6 +376,7 @@ struct ipoib_path { struct rb_node rb_node; struct list_head list; + int valid; }; struct ipoib_neigh { @@ -433,11 +441,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_ib_dev_flush_light(struct work_struct *work); +void ipoib_ib_dev_flush_normal(struct work_struct *work); +void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5d50e5261eed..66cafa20c246 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -902,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, + enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; @@ -915,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, pkey_event); + __ipoib_ib_dev_flush(cpriv, level); mutex_unlock(&priv->vlan_mutex); @@ -929,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) return; } - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); @@ -947,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) priv->pkey_index = new_index; } - ipoib_dbg(priv, "flushing\n"); + if (level == IPOIB_FLUSH_LIGHT) { + ipoib_mark_paths_invalid(dev); + ipoib_mcast_dev_flush(dev); + } - ipoib_ib_dev_down(dev, 0); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_down(dev, 0); - if (pkey_event) { + if (level == IPOIB_FLUSH_HEAVY) { ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_open(dev); } @@ -961,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) * we get here, don't bring it back up if it's not configured up */ if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { - ipoib_ib_dev_up(dev); + if (level >= IPOIB_FLUSH_NORMAL) + ipoib_ib_dev_up(dev); ipoib_mcast_restart_task(&priv->restart_task); } } -void ipoib_ib_dev_flush(struct work_struct *work) +void ipoib_ib_dev_flush_light(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, flush_task); + container_of(work, struct ipoib_dev_priv, flush_light); - ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 0); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); } -void ipoib_pkey_event(struct work_struct *work) +void ipoib_ib_dev_flush_normal(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_event_task); + container_of(work, struct ipoib_dev_priv, flush_normal); - ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); - __ipoib_ib_dev_flush(priv, 1); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); +} + +void ipoib_ib_dev_flush_heavy(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, flush_heavy); + + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index fead88f7fb17..b3fd7e8333cf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -357,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ +void ipoib_mark_paths_invalid(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_path *path, *tp; + + spin_lock_irq(&priv->lock); + + list_for_each_entry_safe(path, tp, &priv->path_list, list) { + ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n", + be16_to_cpu(path->pathrec.dlid), + IPOIB_GID_ARG(path->pathrec.dgid)); + path->valid = 0; + } + + spin_unlock_irq(&priv->lock); +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -393,6 +410,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; + struct ipoib_ah *old_ah; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -416,6 +434,7 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); + old_ah = path->ah; path->ah = ah; if (ah) { @@ -428,6 +447,17 @@ static void path_rec_completion(int status, __skb_queue_tail(&skqueue, skb); list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { + if (neigh->ah) { + WARN_ON(neigh->ah != old_ah); + /* + * Dropping the ah reference inside + * priv->lock is safe here, because we + * will hold one more reference from + * the original value of path->ah (ie + * old_ah). + */ + ipoib_put_ah(neigh->ah); + } kref_get(&path->ah->ref); neigh->ah = path->ah; memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, @@ -450,6 +480,7 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } + path->valid = 1; } path->query = NULL; @@ -457,6 +488,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (old_ah) + ipoib_put_ah(old_ah); + while ((skb = __skb_dequeue(&skqueue))) { skb->dev = dev; if (dev_queue_xmit(skb)) @@ -630,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock(&priv->lock); path = __path_find(dev, phdr->hwaddr + 4); - if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + if (!path || !path->valid) { + if (!path) + path = path_rec_create(dev, phdr->hwaddr + 4); if (path) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof *phdr); @@ -1046,9 +1081,10 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->multicast_list); INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); - INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); - INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); + INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); + INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); + INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b8fa36f509b..96f9aa79cbbe 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -290,15 +290,17 @@ void ipoib_event(struct ib_event_handler *handler, if (record->element.port_num != priv->port) return; - if (record->event == IB_EVENT_PORT_ERR || - record->event == IB_EVENT_PORT_ACTIVE || - record->event == IB_EVENT_LID_CHANGE || - record->event == IB_EVENT_SM_CHANGE || + ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, + record->device->name, record->element.port_num); + + if (record->event == IB_EVENT_SM_CHANGE || record->event == IB_EVENT_CLIENT_REREGISTER) { - ipoib_dbg(priv, "Port state change event\n"); - queue_work(ipoib_workqueue, &priv->flush_task); + queue_work(ipoib_workqueue, &priv->flush_light); + } else if (record->event == IB_EVENT_PORT_ERR || + record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE) { + queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { - ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); - queue_work(ipoib_workqueue, &priv->pkey_event_task); + queue_work(ipoib_workqueue, &priv->flush_heavy); } } From 1ff66e8c1faee7c2711b84b9c89e1c5fcd767839 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 31/52] RDMA/nes: Encapsulate logic nes_put_cqp_request() The iw_nes driver repeats the logic if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { kfree(cqp_request); } else { spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); } } over and over. Wrap this up in functions nes_free_cqp_request() and nes_put_cqp_request() to simplify such code. In addition to making the source smaller and more readable, this shrinks the compiled code quite a bit: add/remove: 2/0 grow/shrink: 0/13 up/down: 164/-1692 (-1528) function old new delta nes_free_cqp_request - 147 +147 nes_put_cqp_request - 17 +17 nes_modify_qp 2316 2293 -23 nes_hw_modify_qp 737 657 -80 nes_dereg_mr 945 860 -85 flush_wqes 501 416 -85 nes_manage_apbvt 648 560 -88 nes_reg_mr 1117 1026 -91 nes_cqp_ce_handler 927 769 -158 nes_alloc_mw 1052 884 -168 nes_create_qp 5314 5141 -173 nes_alloc_fmr 2212 2035 -177 nes_destroy_cq 1097 918 -179 nes_create_cq 2787 2598 -189 nes_dealloc_mw 762 566 -196 Signed-off-by: Roland Dreier Acked-by: Faisal Latif --- drivers/infiniband/hw/nes/nes.h | 4 + drivers/infiniband/hw/nes/nes_hw.c | 60 ++------ drivers/infiniband/hw/nes/nes_utils.c | 24 ++++ drivers/infiniband/hw/nes/nes_verbs.c | 189 ++++---------------------- 4 files changed, 66 insertions(+), 211 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 61b46e9c7d2d..fe88bec48949 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -538,6 +538,10 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); int nes_arp_table(struct nes_device *, u32, u8 *, u32); void nes_mh_fix(unsigned long); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index d3278f111ca7..80e486653ec9 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2710,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) barrier(); cqp_request->request_done = 1; wake_up(&cqp_request->waitq); - if (atomic_dec_and_test(&cqp_request->refcount)) { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } else if (cqp_request->callback) { - /* Envoke the callback routine */ - cqp_request->cqp_callback(nesdev, cqp_request); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + nes_put_cqp_request(nesdev, cqp_request); } else { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + if (cqp_request->callback) + cqp_request->cqp_callback(nesdev, cqp_request); + nes_free_cqp_request(nesdev, cqp_request); } } else { wake_up(&nesdev->cqp.waitq); @@ -3149,7 +3121,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, { struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; - unsigned long flags; struct nes_cqp_request *cqp_request; int ret = 0; u16 major_code; @@ -3184,15 +3155,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -3262,7 +3227,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, u32 which_wq, u32 wait_completion) { - unsigned long flags; struct nes_cqp_request *cqp_request; struct nes_hw_cqp_wqe *cqp_wqe; int ret; @@ -3294,14 +3258,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," " CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index fe83d1b2b177..8f519c860872 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -567,6 +567,30 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) return cqp_request; } +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); + + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } +} + +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + if (atomic_dec_and_test(&cqp_request->refcount)) + nes_free_cqp_request(nesdev, cqp_request); +} /** * nes_post_cqp_request diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index d617da9bd351..464a98a6e119 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); * nes_alloc_mw */ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { - unsigned long flags; struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); if (!ret) { @@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { } else { return ERR_PTR(-ENOMEM); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); nesmr->ibmw.rkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MW; @@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; int err = 0; - unsigned long flags; int ret; /* Deallocate the window with the adapter */ @@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - err = -ETIME; - } else { - err = -EIO; - } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } + if (!ret) + err = -ETIME; + else if (cqp_request->major_code) + err = -EIO; + + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_mrs, (ibmw->rkey & 0x0fffff00) >> 8); @@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); ret = (!ret) ? -ETIME : -EIO; goto failed_leaf_vpbl_pages_alloc; - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } - + nes_put_cqp_request(nesdev, cqp_request); nesfmr->nesmr.ibfmr.lkey = stag; nesfmr->nesmr.ibfmr.rkey = stag; nesfmr->attr = *ibfmr_attr; @@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); @@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } else { return ERR_PTR(-EIO); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); + if (ibpd->uobject) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.actual_sq_size = sq_size; @@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EIO); - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (context) { /* free the nespbl */ @@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) " CQP Major:Minor codes = 0x%04X:0x%04X.\n", nescq->hw_cq.cq_number, ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", + if (!ret) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", nescq->hw_cq.cq_number); - ret = -ETIME; - } else { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", + ret = -ETIME; + } else if (cqp_request->major_code) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", nescq->hw_cq.cq_number); - ret = -EIO; - } + ret = -EIO; } else { ret = 0; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (nescq->cq_mem_size) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, @@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -2771,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) major_code = cqp_request->major_code; minor_code = cqp_request->minor_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) { nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," " ib_mr=%p, rkey = 0x%08X\n", @@ -2904,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, /* struct iw_cm_id *cm_id = nesqp->cm_id; */ /* struct iw_cm_event cm_event; */ struct nes_cqp_request *cqp_request; - unsigned long flags; int ret; u16 major_code; @@ -2950,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, nesqp->hwqp.qp_id, cqp_request->major_code, cqp_request->minor_code, next_iwarp_state); } - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) From 52c8084b740c42af27d5bfa62cec7079d12fbc2b Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 32/52] RDMA/cxgb3: Propagate HW page size capabilities cxgb3 does not currently report the page size capabilities, and incorrectly reports them internally. This version changes the bit-shifting to a static value (per Steve's request). Signed-off-by: Jon Mason Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.h | 1 + drivers/infiniband/hw/cxgb3/iwch.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index a9ff32c36211..656fe47bc84f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -55,6 +55,7 @@ #define T3_MAX_CQ_DEPTH 8192 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL +#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T3_STAG_UNSET 0xffffffff diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 06086256ca1d..4489c89d6710 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -80,7 +80,7 @@ static void rnic_init(struct iwch_dev *rnicp) rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 18a6609f5e01..249d99f4a3cb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1130,6 +1130,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->hw_ver = dev->rdev.t3cdev_p->type; props->fw_ver = fw_vers_string_to_u64(dev); props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = dev->attr.mem_pgsizes_bitmask; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; props->max_mr_size = dev->attr.max_mr_size; From 8294f29767c53e97664a27db9974adea8e2ea95b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 33/52] RDMA/nes: Get rid of ring_doorbell parameter of nes_post_cqp_request() Every caller of nes_post_cqp_request() passed it NES_CQP_REQUEST_RING_DOORBELL, so just remove that parameter and always ring the doorbell. Signed-off-by: Roland Dreier Acked-by: Faisal Latif --- drivers/infiniband/hw/nes/nes.c | 2 +- drivers/infiniband/hw/nes/nes.h | 5 +---- drivers/infiniband/hw/nes/nes_hw.c | 6 +++--- drivers/infiniband/hw/nes/nes_hw.h | 2 +- drivers/infiniband/hw/nes/nes_utils.c | 9 ++++----- drivers/infiniband/hw/nes/nes_verbs.c | 18 +++++++++--------- 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a4e9269a29bd..d2884e778098 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -328,7 +328,7 @@ void nes_rem_ref(struct ib_qp *ibqp) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); u64temp = (u64)nesqp->nesqp_context_pbase; set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index fe88bec48949..39bd897b40c6 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -94,9 +94,6 @@ #define MAX_DPC_ITERATIONS 128 -#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 -#define NES_CQP_REQUEST_RING_DOORBELL 1 - #define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 #define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 #define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 @@ -542,7 +539,7 @@ void nes_free_cqp_request(struct nes_device *nesdev, struct nes_cqp_request *cqp_request); void nes_put_cqp_request(struct nes_device *nesdev, struct nes_cqp_request *cqp_request); -void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); int nes_arp_table(struct nes_device *, u32, u8 *, u32); void nes_mh_fix(unsigned long); void nes_clc(unsigned long); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 80e486653ec9..902b1375a5d8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3147,7 +3147,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (add_port == NES_MANAGE_APBVT_ADD) ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -3217,7 +3217,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } @@ -3249,7 +3249,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (wait_completion) { /* Wait for CQP */ diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 745bf94f3f07..7b81e0ae0076 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1172,7 +1172,7 @@ struct nes_vnic { u32 mcrq_qp_id; struct nes_ucontext *mcrq_ucontext; struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); - void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int); + void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *); int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); struct net_device_stats netstats; /* used to put the netdev on the adapters logical port list */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 8f519c860872..fb8cbd71a2ef 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -596,7 +596,7 @@ void nes_put_cqp_request(struct nes_device *nesdev, * nes_post_cqp_request */ void nes_post_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request, int ring_doorbell) + struct nes_cqp_request *cqp_request) { struct nes_hw_cqp_wqe *cqp_wqe; unsigned long flags; @@ -624,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, cqp_request->waiting, atomic_read(&cqp_request->refcount)); barrier(); - if (ring_doorbell) { - /* Ring doorbell (1 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - } + + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); barrier(); } else { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 464a98a6e119..e3939d13484e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -118,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -175,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", @@ -477,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -1417,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", @@ -1744,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", @@ -1841,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", @@ -1987,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, barrier(); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), @@ -2638,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); @@ -2809,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ if (wait_completion) { From 70fe1796a5ebc5f955be39bba5c42eee9eb89e1f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: [PATCH 34/52] RDMA/cxgb3: Set rkey field for new memory windows in iwch_alloc_mw() Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 249d99f4a3cb..c9a3893b38e8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -748,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) mhp->attr.type = TPT_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; insert_handle(rhp, &rhp->mmidr, mhp, mmid); PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); From c03d4731b5b6de45b95a10bf1d510dde423d6757 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: [PATCH 35/52] IPoIB: Remove unused IPOIB_MCAST_STARTED code The IPOIB_MCAST_STARTED flag is not used at all since commit b3e2749b ("IPoIB: Don't drop multicast sends when they can be queued"), so remove it. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bb19587c5eaf..66a897567ea9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -89,7 +89,6 @@ enum { IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_CSUM = 11, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 4a6538b9301a..0b7d129161e1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -592,10 +592,6 @@ int ipoib_mcast_start_thread(struct net_device *dev) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - spin_lock_irq(&priv->lock); - set_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - return 0; } @@ -605,10 +601,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irq(&priv->lock); - clear_bit(IPOIB_MCAST_STARTED, &priv->flags); - spin_unlock_irq(&priv->lock); - mutex_lock(&mcast_mutex); clear_bit(IPOIB_MCAST_RUN, &priv->flags); cancel_delayed_work(&priv->mcast_task); From 5892eff91ad60ba365ae7f75050ce464036c5396 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: [PATCH 36/52] IPoIB: Remove priv->mcast_mutex No need for a mutex around calls to ib_attach_mcast/ib_detach_mcast since these operations are synchronized at the HW driver layer. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 ---- 3 files changed, 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 66a897567ea9..b8753222c870 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,6 @@ struct ipoib_dev_priv { unsigned long flags; - struct mutex mcast_mutex; struct mutex vlan_mutex; struct rb_root path_tree; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b3fd7e8333cf..8be9ea0436e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1072,7 +1072,6 @@ static void ipoib_setup(struct net_device *dev) spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - mutex_init(&priv->mcast_mutex); mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 96f9aa79cbbe..f50ebe0643ef 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -61,9 +61,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } /* attach QP to multicast group */ - mutex_lock(&priv->mcast_mutex); ret = ib_attach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); @@ -77,9 +75,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - mutex_lock(&priv->mcast_mutex); ret = ib_detach_mcast(priv->qp, mgid, mlid); - mutex_unlock(&priv->mcast_mutex); if (ret) ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); From d0de13622d5ac658efe7c51521dbdbe0752aa3dd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: [PATCH 37/52] IPoIB: Only set Q_Key once: after joining broadcast group The current code will set the Q_Key for any join of a non-sendonly multicast group. The operation involves a modify QP operation, which is fairly heavyweight, and is only really required after the join of the broadcast group. Fix this by adding a parameter to ipoib_mcast_attach() to control when the Q_Key is set. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- .../infiniband/ulp/ipoib/ipoib_multicast.c | 4 ++- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 28 ++++++++++--------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b8753222c870..7b46e2d7b3c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -485,7 +485,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); + union ib_gid *mgid, int set_qkey); int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0b7d129161e1..55ebd950bf23 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -186,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah; int ret; + int set_qkey = 0; mcast->mcmember = *mcmember; @@ -200,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + set_qkey = 1; } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -212,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + &mcast->mcmember.mgid, set_qkey); if (ret < 0) { ipoib_warn(priv, "couldn't attach QP to multicast group " IPOIB_GID_FMT "\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index f50ebe0643ef..ba7c8868e6f7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -33,18 +33,13 @@ #include "ipoib.h" -int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) +int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_qp_attr *qp_attr; + struct ib_qp_attr *qp_attr = NULL; int ret; u16 pkey_index; - ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) - goto out; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; @@ -52,12 +47,19 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - /* set correct QKey for QP */ - qp_attr->qkey = priv->qkey; - ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); - if (ret) { - ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); - goto out; + if (set_qkey) { + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto out; + + /* set correct QKey for QP */ + qp_attr->qkey = priv->qkey; + ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); + if (ret) { + ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); + goto out; + } } /* attach QP to multicast group */ From 9eae554c171e086c89ab83da2a2d3c8bf958fcb5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: [PATCH 38/52] IPoIB: Get rid of ipoib_mcast_detach() wrapper ipoib_mcast_detach() does nothing except call ib_detach_mcast(), so just use the core API in the one place that does a multicast group detach. add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-105 (-105) function old new delta ipoib_mcast_leave 357 319 -38 ipoib_mcast_detach 67 - -67 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 12 ------------ 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7b46e2d7b3c2..a89b9fbe1ef4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -486,8 +486,6 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 55ebd950bf23..71add7a8d53c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -627,10 +627,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) IPOIB_GID_ARG(mcast->mcmember.mgid)); /* Remove ourselves from the multicast group */ - ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), - &mcast->mcmember.mgid); + ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, + be16_to_cpu(mcast->mcmember.mlid)); if (ret) - ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); + ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); } return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index ba7c8868e6f7..68325119f740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -72,18 +72,6 @@ out: return ret; } -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - int ret; - - ret = ib_detach_mcast(priv->qp, mgid, mlid); - if (ret) - ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); - - return ret; -} - int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); From c8c2afe360b7366f586f6bece1109a72ea334876 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:51 -0700 Subject: [PATCH 39/52] IPoIB: Use rtnl lock/unlock when changing device flags Use of this lock is required to synchronize changes to the netdvice's data structs. Also move the call to ipoib_flush_paths() after the modification of the netdevice flags in set_mode(). Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++++-- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 37bf67b2a26f..b4269139135b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1440,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + rtnl_lock(); dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); @@ -1449,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (!strcmp(buf, "datagram\n")) { clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - dev->mtu = min(priv->mcast_mtu, dev->mtu); - ipoib_flush_paths(dev); + rtnl_lock(); if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } + dev->mtu = min(priv->mcast_mtu, dev->mtu); + rtnl_unlock(); + ipoib_flush_paths(dev); return count; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 71add7a8d53c..be1ed38cdcfd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - if (!ipoib_cm_admin_enabled(dev)) + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + rtnl_unlock(); + } ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); From bd3606715effbf37df986548c43bbed0842b49d5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:51 -0700 Subject: [PATCH 40/52] IPoIB: Use dev_set_mtu() to change mtu When the driver sets the MTU of the net device outside of its change_mtu method, it should make use of dev_set_mtu() instead of directly setting the mtu field of struct netdevice. Otherwise functions registered to be called upon MTU change will not get called (this is done through call_netdevice_notifiers() in dev_set_mtu()). Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b4269139135b..87f9f3ef3b2d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1458,7 +1458,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (priv->hca_caps & IB_DEVICE_UD_TSO) dev->features |= NETIF_F_TSO; } - dev->mtu = min(priv->mcast_mtu, dev->mtu); + dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index be1ed38cdcfd..1fcc9a898d81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -577,7 +577,7 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!ipoib_cm_admin_enabled(dev)) { rtnl_lock(); - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); rtnl_unlock(); } From df8666198dd058b9498ebdbc52c61957206d30a5 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 41/52] IB/ipath: Use IEEE OUI for vendor_id reported by ibv_query_device() The IB spe. for SubnGet(NodeInfo) and query HCA says that the vendor ID field should be the IEEE OUI assigned to the vendor. The ipath driver was returning the PCI vendor ID instead. This will affect applications which call ibv_query_device(). The old value was 0x001fc1 or 0x001077, the new value is 0x001175. The vendor ID doesn't appear to be exported via /sys so that should reduce possible compatibility issues. I'm only aware of Open MPI as a major application which depends on this change, and they have made necessary adjustments. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_mad.c | 6 +++--- drivers/infiniband/hw/ipath/ipath_verbs.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 5f9315d77a43..be4fc9ada8e7 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; vendor = dd->ipath_vendorid; - nip->vendor_id[0] = 0; - nip->vendor_id[1] = vendor >> 8; - nip->vendor_id[2] = vendor; + nip->vendor_id[0] = IPATH_SRC_OUI_1; + nip->vendor_id[1] = IPATH_SRC_OUI_2; + nip->vendor_id[2] = IPATH_SRC_OUI_3; return reply(smp); } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 7779165b2c2c..9e23ab0b51a1 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1497,7 +1497,8 @@ static int ipath_query_device(struct ib_device *ibdev, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; props->page_size_cap = PAGE_SIZE; - props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_id = + IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; props->vendor_part_id = dev->dd->ipath_deviceid; props->hw_ver = dev->dd->ipath_pcirev; From e112373fd6aa280bd2cbc0d5cc3809115325a1be Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 42/52] IPoIB/cm: Reduce connected mode TX object size Since IPoIB connected mode does not NETIF_F_SG, we only have one DMA mapping per send, so we don't need a mapping[] array. Define a new struct with a single u64 mapping member and use it for the CM tx_ring. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 ++++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a89b9fbe1ef4..0281c8fecc90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -157,6 +157,11 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; +struct ipoib_cm_tx_buf { + struct sk_buff *skb; + u64 mapping; +}; + struct ib_cm_id; struct ipoib_cm_data { @@ -215,7 +220,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 87f9f3ef3b2d..0f2d3045061a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -703,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; u64 addr; if (unlikely(skb->len > tx->mtu)) { @@ -734,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - tx_req->mapping[0] = addr; + tx_req->mapping = addr; if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), addr, skb->len))) { @@ -759,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -773,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1143,7 +1143,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long flags; unsigned long begin; @@ -1171,7 +1171,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, + ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; From bc3a290b51aaefc6a6af2d6e6d52ed32387c416c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 43/52] IPoIB: Double default RX/TX ring sizes Increase IPoIB ring sizes to twice their original sizes (RX: 128->256, TX: 64->128) to act as a shock absorber for high traffic peaks. With the current settings, we have seen cases that there are many calls to netif_stop_queue(), which causes degradation in throughput. Also, larger receive buffer sizes help IPoIB in CM mode to avoid experiencing RNR NAK conditions due to insufficient receive buffers at the SRQ. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0281c8fecc90..b0ffc9abe8c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -70,8 +70,8 @@ enum { IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_RX_RING_SIZE = 256, + IPOIB_TX_RING_SIZE = 128, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, From 4522e08ced48baaf28990e2674e940aae9940310 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 44/52] IB/mthca: Remove "stop" flag for catastrophic error polling timer Since we use del_timer_sync() anyway, there's no need for an additional flag to tell the timer not to rearm. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 15 ++------------- drivers/infiniband/hw/mthca/mthca_dev.h | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 40573e48439c..50e2792e60c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -126,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev) static void poll_catas(unsigned long dev_ptr) { struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; - unsigned long flags; int i; for (i = 0; i < dev->catas_err.size; ++i) @@ -135,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr) return; } - spin_lock_irqsave(&catas_lock, flags); - if (!dev->catas_err.stop) - mod_timer(&dev->catas_err.timer, - jiffies + MTHCA_CATAS_POLL_INTERVAL); - spin_unlock_irqrestore(&catas_lock, flags); - - return; + mod_timer(&dev->catas_err.timer, + jiffies + MTHCA_CATAS_POLL_INTERVAL); } void mthca_start_catas_poll(struct mthca_dev *dev) @@ -149,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) unsigned long addr; init_timer(&dev->catas_err.timer); - dev->catas_err.stop = 0; dev->catas_err.map = NULL; addr = pci_resource_start(dev->pdev, 0) + @@ -180,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) void mthca_stop_catas_poll(struct mthca_dev *dev) { - spin_lock_irq(&catas_lock); - dev->catas_err.stop = 1; - spin_unlock_irq(&catas_lock); - del_timer_sync(&dev->catas_err.timer); if (dev->catas_err.map) { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 2997d8d564ea..ee4d073c889f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -277,7 +277,6 @@ struct mthca_mcg_table { struct mthca_catas_err { u64 addr; u32 __iomem *map; - unsigned long stop; u32 size; struct timer_list timer; struct list_head list; From c036925ac0e940a4e8525b08e89d2c64fe282c51 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 45/52] IB/mthca: Use round_jiffies() for catastrophic error polling timer Exactly when the catastrophic error polling timer function runs is not important, so use round_jiffies() to save unnecessary wakeups. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 50e2792e60c1..cc440f90000b 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -135,7 +135,7 @@ static void poll_catas(unsigned long dev_ptr) } mod_timer(&dev->catas_err.timer, - jiffies + MTHCA_CATAS_POLL_INTERVAL); + round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); } void mthca_start_catas_poll(struct mthca_dev *dev) From aed012279d35e88e29fd55737d8821604433f50a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: [PATCH 46/52] IB/mthca: Fix check of max_send_sge for special QPs The MLX transport requires two extra gather entries for sends (one for the header and one for the checksum at the end, as the comment says). However the code checked that max_recv_sge was not too big, instead of checking max_send_sge as it should have. Fix the code to check the correct condition. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 77b52b3adcc7..f5081bfde6db 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1249,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, return -EINVAL; /* - * For MLX transport we need 2 extra S/G entries: + * For MLX transport we need 2 extra send gather entries: * one for the header and one for the checksum at the end */ - if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) + if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { From 96f15c03532282366364ecfd20f04e49b5d96f3a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 47/52] RDMA/core: Add local DMA L_Key support - Change the IB_DEVICE_ZERO_STAG flag to the transport-neutral name IB_DEVICE_LOCAL_DMA_LKEY, which is used by iWARP RNICs to indicate 0 STag support and IB HCAs to indicate reserved L_Key support. - Add a u32 local_dma_lkey member to struct ib_device. Drivers fill this in with the appropriate local DMA L_Key (if they support it). - Fix up the drivers using this flag. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_rnic.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 6 ++++-- drivers/infiniband/hw/nes/nes_hw.c | 2 +- include/rdma/ib_verbs.h | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index b1441aeb60c2..dd05c4835642 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -454,7 +454,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) (IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_ZERO_STAG | + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index c9a3893b38e8..7ecfd4d638c5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1325,8 +1325,10 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = IB_DEVICE_ZERO_STAG | - IB_DEVICE_MEM_WINDOW; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + + /* cxgb3 supports STag 0. */ + dev->ibdev.local_dma_lkey = 0; if (fw_supports_fastreg(dev)) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 902b1375a5d8..85f26d19a32b 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -398,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->base_pd = 1; nesadapter->device_cap_flags = - IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW; + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 07b41e05565a..90b529f7a154 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -91,7 +91,7 @@ enum ib_device_cap_flags { IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), IB_DEVICE_SRQ_RESIZE = (1<<13), IB_DEVICE_N_NOTIFY_CQ = (1<<14), - IB_DEVICE_ZERO_STAG = (1<<15), + IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ IB_DEVICE_MEM_WINDOW = (1<<17), /* @@ -1149,6 +1149,7 @@ struct ib_device { char node_desc[64]; __be64 node_guid; + u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; }; From 4ab928f69208d240d3681336f34589e4b151824f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 48/52] RDMA/cxgb3: Fixes for zero STag Handling the zero STag in receive work request requires some extra logic in the driver: - Only set the QP_PRIV bit for kernel mode QPs. - Add a zero STag build function for recv wrs. The uP needs a PBL allocated and passed down in the recv WR so it can construct a HW PBL for the zero STag S/G entries. Note: we need to place a few restrictions on zero STag usage because of this: 1) all SGEs in a recv WR must either be zero STag or not. No mixing. 2) an individual SGE length cannot exceed 128MB for a zero-stag SGE. This should be OK since it's not really practical to allocate such a large chunk of pinned contiguous DMA mapped memory. - Add an optimized non-zero-STag recv wr format for kernel users. This is needed to optimize both zero and non-zero STag cracking in the recv path for kernel users. - Remove the iwch_ prefix from the static build functions. - Bump required FW version. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 12 +- drivers/infiniband/hw/cxgb3/cxio_wr.h | 13 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 4 +- drivers/infiniband/hw/cxgb3/iwch_qp.c | 129 +++++++++++++++++--- drivers/net/cxgb3/version.h | 2 +- 5 files changed, 132 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 340e4181c761..f6d5747153a5 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -278,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->qpid) return -ENOMEM; - wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); + wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); if (!wq->rq) goto err1; @@ -302,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + (wq->qpid << rdev_p->qpshift); + wq->rdev = rdev_p; PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, wq->qpid, wq->doorbell, (unsigned long long) wq->udb); return 0; @@ -1266,13 +1267,16 @@ proc_cqe: wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); PDBG("%s completing sq idx %ld\n", __func__, Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = (wq->sq + - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; + *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; wq->sq_rptr++; } else { PDBG("%s completing rq idx %ld\n", __func__, Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; + if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) + cxio_hal_pblpool_free(wq->rdev, + wq->rq[Q_PTR2IDX(wq->rq_rptr, + wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); wq->rq_rptr++; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index de760e9f1cc6..04618f7bfbb3 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -39,6 +39,9 @@ #define T3_MAX_SGE 4 #define T3_MAX_INLINE 64 +#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) +#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) +#define T3_STAG0_PAGE_SHIFT 15 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ @@ -665,6 +668,11 @@ struct t3_swsq { int signaled; }; +struct t3_swrq { + __u64 wr_id; + __u32 pbl_addr; +}; + /* * A T3 WQ implements both the SQ and RQ. */ @@ -681,14 +689,15 @@ struct t3_wq { u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ u32 sq_rptr; /* pending wrs */ u32 sq_size_log2; /* sq size */ - u64 *rq; /* SW RQ (holds consumer wr_ids */ + struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ u32 rq_rptr; /* pending wrs */ - u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ + struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ u32 rq_size_log2; /* rq size */ u32 rq_addr; /* rq adapter address */ void __iomem *doorbell; /* kernel db */ u64 udb; /* user db if any */ + struct cxio_rdev *rdev; }; struct t3_cq { diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 7ecfd4d638c5..b89640aa6e10 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1007,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->ibqp.qp_num = qhp->wq.qpid; init_timer(&(qhp->timer)); PDBG("%s sq_num_entries %d, rq_num_entries %d " - "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, - 1 << qhp->wq.size_log2); + 1 << qhp->wq.size_log2, qhp->wq.rq_addr); return &qhp->ibqp; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3b44300a3036..9a3be3a9d5dc 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -33,10 +33,11 @@ #include "iwch.h" #include "iwch_cm.h" #include "cxio_hal.h" +#include "cxio_resource.h" #define NO_SUPPORT -1 -static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, u8 * flit_cnt) { int i; @@ -81,7 +82,7 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { int i; @@ -122,7 +123,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { if (wr->num_sge > 1) @@ -143,7 +144,7 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { int i; @@ -185,7 +186,7 @@ static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); @@ -244,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return 0; } -static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, +static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; + int i, err = 0; + u32 pbl_addr[T3_MAX_SGE]; + u8 page_size[T3_MAX_SGE]; + + err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, + page_size); + if (err) + return err; + wqe->recv.pagesz[0] = page_size[0]; + wqe->recv.pagesz[1] = page_size[1]; + wqe->recv.pagesz[2] = page_size[2]; + wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + + /* to in the WQE == the offset into the page */ + wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % + (1UL << (12 + page_size[i]))); + + /* pbl_addr is the adapters address in the PBL */ + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); } for (; i < T3_MAX_SGE; i++) { wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = 0; + return 0; +} + +static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + u32 pbl_addr; + u32 pbl_offset; + + + /* + * The T3 HW requires the PBL in the HW recv descriptor to reference + * a PBL entry. So we allocate the max needed PBL memory here and pass + * it to the uP in the recv WR. The uP will build the PBL and setup + * the HW recv descriptor. + */ + pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); + if (!pbl_addr) + return -ENOMEM; + + /* + * Compute the 8B aligned offset. + */ + pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; + + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + + /* + * Use a 128MB page size. This and an imposed 128MB + * sge length limit allows us to require only a 2-entry HW + * PBL for each SGE. This restriction is acceptable since + * since it is not possible to allocate 128MB of contiguous + * DMA coherent memory! + */ + if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) + return -EINVAL; + wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; + + /* + * T3 restricts a recv to all zero-stag or all non-zero-stag. + */ + if (wr->sg_list[i].lkey != 0) + return -EINVAL; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); + pbl_offset += 2; + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.pagesz[i] = 0; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; return 0; } @@ -312,18 +396,18 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (wr->send_flags & IB_SEND_FENCE) t3_wr_flags |= T3_READ_FENCE_FLAG; t3_wr_opcode = T3_WR_SEND; - err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: t3_wr_opcode = T3_WR_WRITE; - err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ - err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); if (err) break; sqp->read_len = wqe->read.local_len; @@ -332,14 +416,14 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_FAST_REG_MR: t3_wr_opcode = T3_WR_FASTREG; - err = iwch_build_fastreg(wqe, wr, &t3_wr_flit_cnt, + err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, &wr_cnt, &qhp->wq); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) t3_wr_flags |= T3_LOCAL_FENCE_FLAG; t3_wr_opcode = T3_WR_INV_STAG; - err = iwch_build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); break; default: PDBG("%s post of type=%d TBD!\n", __func__, @@ -398,18 +482,24 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } while (wr) { + if (wr->num_sge > T3_MAX_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); if (num_wrs) - err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); + if (wr->sg_list[0].lkey) + err = build_rdma_recv(qhp, wqe, wr); + else + err = build_zero_stag_recv(qhp, wqe, wr); else err = -ENOMEM; if (err) { *bad_wr = wr; break; } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = - wr->wr_id; build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); @@ -810,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = iwch_rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; + if (!qhp->ibqp.uobject) + init_attr.flags |= PRIV_QP; if (peer2peer) { init_attr.rtr_type = RTR_READ; if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h index a0177fc55e28..29db711303b9 100644 --- a/drivers/net/cxgb3/version.h +++ b/drivers/net/cxgb3/version.h @@ -38,7 +38,7 @@ #define DRV_VERSION "1.0-ko" /* Firmware version */ -#define FW_VERSION_MAJOR 6 +#define FW_VERSION_MAJOR 7 #define FW_VERSION_MINOR 0 #define FW_VERSION_MICRO 0 #endif /* __CHELSIO_VERSION_H */ From 64c5e613b9dd34ef1281ed6d22478609667ae36a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 49/52] RDMA/addr: Keep pointer to netdevice in struct rdma_dev_addr Keep a pointer to the local (src) netdevice in struct rdma_dev_addr, and copy it in as part of rdma_copy_addr(). Use rdma_translate_ip() in cma_new_conn_id() to reduce some code duplication and also make sure the src_dev member gets set. In a high-availability configuration the netdevice pointer can be used by the RDMA CM to align RDMA sessions to use the same links as the IP stack does under fail-over and route change cases. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 1 + drivers/infiniband/core/cma.c | 8 +++++--- include/rdma/ib_addr.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index e4eb8be3bb0c..09a2bec7fd32 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -105,6 +105,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); + dev_addr->src_dev = dev; return 0; } EXPORT_SYMBOL(rdma_copy_addr); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 44d190f67810..5fb506a41776 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1002,6 +1002,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, union cma_ip_addr *src, *dst; __be16 port; u8 ip_ver; + int ret; if (cma_get_net_info(ib_event->private_data, listen_id->ps, &ip_ver, &port, &src, &dst)) @@ -1026,10 +1027,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; + ret = rdma_translate_ip(&id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto destroy_id; id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b42bdd000419..483057b2f4b4 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -61,6 +61,7 @@ struct rdma_dev_addr { unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; enum rdma_node_type dev_type; + struct net_device *src_dev; }; /** From de910bd92137005b5e1ecaf2ce68053d7d7d5350 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 50/52] RDMA/cma: Simplify locking needed for serialization of callbacks The RDMA CM has some logic in place to make sure that callbacks on a given CM ID are delivered to the consumer in a serialized manner. Specifically it has code to protect against a device removal racing with a running callback function. This patch simplifies this logic by using a mutex per ID instead of a wait queue and atomic variable. This means that cma_disable_remove() now is more properly named to cma_disable_callback(), and cma_enable_remove() can now be removed because it just would become a trivial wrapper around mutex_unlock(). Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 106 ++++++++++++++++------------------ 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5fb506a41776..ae11d5cc74d0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -130,8 +130,7 @@ struct rdma_id_private { struct completion comp; atomic_t refcount; - wait_queue_head_t wait_remove; - atomic_t dev_remove; + struct mutex handler_mutex; int backlog; int timeout_ms; @@ -355,26 +354,15 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -static int cma_disable_remove(struct rdma_id_private *id_priv, +static int cma_disable_callback(struct rdma_id_private *id_priv, enum cma_state state) { - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == state) { - atomic_inc(&id_priv->dev_remove); - ret = 0; - } else - ret = -EINVAL; - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - -static void cma_enable_remove(struct rdma_id_private *id_priv) -{ - if (atomic_dec_and_test(&id_priv->dev_remove)) - wake_up(&id_priv->wait_remove); + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != state) { + mutex_unlock(&id_priv->handler_mutex); + return -EINVAL; + } + return 0; } static int cma_has_cm_dev(struct rdma_id_private *id_priv) @@ -399,8 +387,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); - init_waitqueue_head(&id_priv->wait_remove); - atomic_set(&id_priv->dev_remove, 0); + mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -927,7 +914,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -984,12 +971,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -1101,7 +1088,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; - if (cma_disable_remove(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, CMA_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); @@ -1122,7 +1109,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto out; } - atomic_inc(&conn_id->dev_remove); + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); mutex_lock(&lock); ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); @@ -1144,7 +1131,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) !cma_is_ud_ps(conn_id->id.ps)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); mutex_unlock(&lock); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); goto out; } @@ -1153,11 +1140,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) release_conn_id: cma_exch(conn_id, CMA_DESTROYING); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(&conn_id->id); out: - cma_enable_remove(listen_id); + mutex_unlock(&listen_id->handler_mutex); return ret; } @@ -1223,7 +1210,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr_in *sin; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -1267,12 +1254,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -1288,7 +1275,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct ib_device_attr attr; listen_id = cm_id->context; - if (cma_disable_remove(listen_id, CMA_LISTEN)) + if (cma_disable_callback(listen_id, CMA_LISTEN)) return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ @@ -1300,19 +1287,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); - atomic_inc(&conn_id->dev_remove); + mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = CMA_CONNECT; dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1321,7 +1308,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1337,7 +1324,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = ib_query_device(conn_id->id.device, &attr); if (ret) { - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } @@ -1353,14 +1340,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, CMA_DESTROYING); - cma_enable_remove(conn_id); + mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(&conn_id->id); + goto out; } + mutex_unlock(&conn_id->handler_mutex); + out: if (dev) dev_put(dev); - cma_enable_remove(listen_id); + mutex_unlock(&listen_id->handler_mutex); return ret; } @@ -1592,7 +1582,7 @@ static void cma_work_handler(struct work_struct *_work) struct rdma_id_private *id_priv = work->id; int destroy = 0; - atomic_inc(&id_priv->dev_remove); + mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; @@ -1601,7 +1591,7 @@ static void cma_work_handler(struct work_struct *_work) destroy = 1; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); @@ -1764,7 +1754,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_cm_event event; memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); + mutex_lock(&id_priv->handler_mutex); /* * Grab mutex to block rdma_destroy_id() from removing the device while @@ -1793,13 +1783,13 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); } @@ -2126,7 +2116,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_remove(id_priv, CMA_CONNECT)) + if (cma_disable_callback(id_priv, CMA_CONNECT)) return 0; memset(&event, 0, sizeof event); @@ -2167,12 +2157,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -2570,8 +2560,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) int ret; id_priv = mc->id_priv; - if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && - cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) + if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) && + cma_disable_callback(id_priv, CMA_ADDR_RESOLVED)) return 0; mutex_lock(&id_priv->qp_mutex); @@ -2596,12 +2586,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, CMA_DESTROYING); - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; } - cma_enable_remove(id_priv); + mutex_unlock(&id_priv->handler_mutex); return 0; } @@ -2760,6 +2750,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; enum cma_state state; + int ret = 0; /* Record that we want to remove the device */ state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); @@ -2767,15 +2758,18 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv) return 0; cma_cancel_operation(id_priv, state); - wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove)); + mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) - return 0; + goto out; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - return id_priv->id.event_handler(&id_priv->id, &event); + ret = id_priv->id.event_handler(&id_priv->id, &event); +out: + mutex_unlock(&id_priv->handler_mutex); + return ret; } static void cma_process_remove(struct cma_device *cma_dev) From 2d92865158d0e21ef4350703af64bc2a610d81d3 Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 51/52] mlx4_core: Use MOD_STAT_CFG command to get minimal page size There was a bug in some versions of the mlx4 driver in mlx4_alloc_fmr(), which hardcoded the minimum acceptable page_shift to be 12. However, new ConnectX firmware can support a minimum page_shift of 9 (log_pg_sz of 9 returned by QUERY_DEV_LIM) -- so with old drivers, ib_fmr_alloc() would fail for ULPs using the device minimum when creating FMRs. To preserve firmware compatibility with released mlx4 drivers, the firmware will continue to return 12 as before for log_page_sz in QUERY_DEV_CAP for these drivers. However, to enable new drivers to take advantage of the available smaller page size, the mlx4 driver now first sets the log_pg_sz to the device minimum by setting a log_page_sz value to 0 via the MOD_STAT_CFG command and then reading the real minimum via QUERY_DEV_CAP. Signed-off-by: Jack Morgenstein Signed-off-by: Vladimir Sokolovsky Signed-off-by: Roland Dreier --- drivers/net/mlx4/fw.c | 28 ++++++++++++++++++++++++++++ drivers/net/mlx4/fw.h | 6 ++++++ drivers/net/mlx4/main.c | 7 +++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index d82f2751d2c7..2b5006b9be67 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -101,6 +101,34 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags) mlx4_dbg(dev, " %s\n", fname[i]); } +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *inbox; + int err = 0; + +#define MOD_STAT_CFG_IN_SIZE 0x100 + +#define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 +#define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + memset(inbox, 0, MOD_STAT_CFG_IN_SIZE); + + MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); + MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); + + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, + MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 306cb9b0242d..a0e046c149b7 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -38,6 +38,11 @@ #include "mlx4.h" #include "icm.h" +struct mlx4_mod_stat_cfg { + u8 log_pg_sz; + u8 log_pg_sz_m; +}; + struct mlx4_dev_cap { int max_srq_sz; int max_qp_sz; @@ -162,5 +167,6 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages); int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); int mlx4_NOP(struct mlx4_dev *dev); +int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg); #endif /* MLX4_FW_H */ diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index a6aa49fc1d68..d3736013fe9b 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -485,6 +485,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_adapter adapter; struct mlx4_dev_cap dev_cap; + struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; u64 icm_size; @@ -502,6 +503,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev) return err; } + mlx4_cfg.log_pg_sz_m = 1; + mlx4_cfg.log_pg_sz = 0; + err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); + if (err) + mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); + err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); From f507d28bff0601f1a8a96b7939fa3855c50d25b6 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:53 -0700 Subject: [PATCH 52/52] IB/mlx4: Use kzalloc() for new QPs so flags are initialized to 0 Current code uses kmalloc() and then just does a bitwise OR operation on qp->flags in create_qp_common(), which means that qp->flags may potentially have some unintended bits set. This patch uses kzalloc() and avoids further explicit clearing of structure members, which also shrinks the code: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-65 (-65) function old new delta create_qp_common 2024 1959 -65 Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 91590e7fba0c..89eb6cbe592e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -454,19 +454,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->rq.lock); qp->state = IB_QPS_RESET; - qp->atomic_rd_en = 0; - qp->resp_depth = 0; - - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - else - qp->sq_signal_bits = 0; err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) @@ -704,7 +693,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -725,7 +714,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (pd->uobject) return ERR_PTR(-EINVAL); - sqp = kmalloc(sizeof *sqp, GFP_KERNEL); + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); if (!sqp) return ERR_PTR(-ENOMEM);