From b4c296f9c96420b8e7e92466ea5960f10ee20aae Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 17 Aug 2018 16:45:51 -0600 Subject: [PATCH 1/3] RDMA/smc: Replace ib_query_gid with rdma_get_gid_attr All RDMA ULPs should be using rdma_get_gid_attr instead of ib_query_gid. Convert SMC to use the new API. In the process correct some confusion with gid_type - if attr->ndev is !NULL then gid_type can never be IB_GID_TYPE_IB by definition. IB_GID_TYPE_ROCE shares the same enum value and is probably what was intended here. Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- include/rdma/ib_cache.h | 24 --------------------- net/smc/smc_ib.c | 48 +++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 47 deletions(-) diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 3e11e7cc60b7..62e990b620aa 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -133,28 +133,4 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); -/* - * This is to be removed. It only exists to make merging rdma and smc simpler. - */ -static inline __deprecated int ib_query_gid(struct ib_device *device, - u8 port_num, int index, - union ib_gid *gid, - struct ib_gid_attr *attr_out) -{ - const struct ib_gid_attr *attr; - - memset(attr_out, 0, sizeof(*attr_out)); - attr = rdma_get_gid_attr(device, port_num, index); - if (IS_ERR(attr)) - return PTR_ERR(attr); - - if (attr->ndev) - dev_hold(attr->ndev); - *attr_out = *attr; - - rdma_put_gid_attr(attr); - - return 0; -} - #endif /* _IB_CACHE_H */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9bb5274a244e..e519ef29c0ff 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -145,17 +145,21 @@ out: static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { - struct ib_gid_attr gattr; - union ib_gid gid; - int rc; + const struct ib_gid_attr *attr; + int rc = 0; - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr); - if (rc || !gattr.ndev) + attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); + if (IS_ERR(attr)) return -ENODEV; - memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); - dev_put(gattr.ndev); - return 0; + if (attr->ndev) + memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr, + ETH_ALEN); + else + rc = -ENODEV; + + rdma_put_gid_attr(attr); + return rc; } /* Create an identifier unique for this instance of SMC-R. @@ -180,29 +184,27 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index) { - struct ib_gid_attr gattr; - union ib_gid _gid; + const struct ib_gid_attr *attr; int i; for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { - memset(&_gid, 0, SMC_GID_SIZE); - memset(&gattr, 0, sizeof(gattr)); - if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr)) + attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); + if (IS_ERR(attr)) continue; - if (!gattr.ndev) - continue; - if (((!vlan_id && !is_vlan_dev(gattr.ndev)) || - (vlan_id && is_vlan_dev(gattr.ndev) && - vlan_dev_vlan_id(gattr.ndev) == vlan_id)) && - gattr.gid_type == IB_GID_TYPE_IB) { + + if (attr->ndev && + ((!vlan_id && !is_vlan_dev(attr->ndev)) || + (vlan_id && is_vlan_dev(attr->ndev) && + vlan_dev_vlan_id(attr->ndev) == vlan_id)) && + attr->gid_type == IB_GID_TYPE_ROCE) { if (gid) - memcpy(gid, &_gid, SMC_GID_SIZE); + memcpy(gid, &attr->gid, SMC_GID_SIZE); if (sgid_index) - *sgid_index = i; - dev_put(gattr.ndev); + *sgid_index = attr->index; + rdma_put_gid_attr(attr); return 0; } - dev_put(gattr.ndev); + rdma_put_gid_attr(attr); } return -ENODEV; } From c513de490f808d8480346f9a58e6a4a5f3de12e7 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 15 Aug 2018 22:54:49 -0700 Subject: [PATCH 2/3] IB/hfi1: Invalid NUMA node information can cause a divide by zero If the system BIOS does not supply NUMA node information to the PCI devices, the NUMA node is selected by choosing the current node. This can lead to the following crash: divide error: 0000 SMP CPU: 0 PID: 4 Comm: kworker/0:0 Tainted: G IOE ------------ 3.10.0-693.21.1.el7.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014 Workqueue: events work_for_cpu_fn task: ffff880174480fd0 ti: ffff880174488000 task.ti: ffff880174488000 RIP: 0010: [] hfi1_dev_affinity_init+0x129/0x6a0 [hfi1] RSP: 0018:ffff88017448bbf8 EFLAGS: 00010246 RAX: 0000000000000011 RBX: ffff88107ffba6c0 RCX: ffff88085c22e130 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880824ad0000 RBP: ffff88017448bc48 R08: 0000000000000011 R09: 0000000000000002 R10: ffff8808582b6ca0 R11: 0000000000003151 R12: ffff8808582b6ca0 R13: ffff8808582b6518 R14: ffff8808582b6010 R15: 0000000000000012 FS: 0000000000000000(0000) GS:ffff88085ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007efc707404f0 CR3: 0000000001a02000 CR4: 00000000001607f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: hfi1_init_dd+0x14b3/0x27a0 [hfi1] ? pcie_capability_write_word+0x46/0x70 ? hfi1_pcie_init+0xc0/0x200 [hfi1] do_init_one+0x153/0x4c0 [hfi1] ? sched_clock_cpu+0x85/0xc0 init_one+0x1b5/0x260 [hfi1] local_pci_probe+0x4a/0xb0 work_for_cpu_fn+0x1a/0x30 process_one_work+0x17f/0x440 worker_thread+0x278/0x3c0 ? manage_workers.isra.24+0x2a0/0x2a0 kthread+0xd1/0xe0 ? insert_kthread_work+0x40/0x40 ret_from_fork+0x77/0xb0 ? insert_kthread_work+0x40/0x40 If the BIOS is not supplying NUMA information: - set the default table count to 1 for all possible nodes - select node 0 (instead of current NUMA) node to get consistent performance - generate an error indicating that the BIOS should be upgraded Reviewed-by: Gary Leshner Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/affinity.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index fbe7198a715a..bedd5fba33b0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -198,7 +198,7 @@ int node_affinity_init(void) while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { node = pcibus_to_node(dev->bus); if (node < 0) - node = numa_node_id(); + goto out; hfi1_per_node_cntr[node]++; } @@ -206,6 +206,18 @@ int node_affinity_init(void) } return 0; + +out: + /* + * Invalid PCI NUMA node information found, note it, and populate + * our database 1:1. + */ + pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n"); + pr_err("HFI: System BIOS may need to be upgraded\n"); + for (node = 0; node < node_affinity.num_possible_nodes; node++) + hfi1_per_node_cntr[node] = 1; + + return 0; } static void node_affinity_destroy(struct hfi1_affinity_node *entry) @@ -622,8 +634,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) int curr_cpu, possible, i, ret; bool new_entry = false; - if (node < 0) - node = numa_node_id(); + /* + * If the BIOS does not have the NUMA node information set, select + * NUMA 0 so we get consistent performance. + */ + if (node < 0) { + dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); + node = 0; + } dd->node = node; local_mask = cpumask_of_node(dd->node); From 845b397a7771f2d3504beff5521f452be0d22eec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 21 Aug 2018 16:20:44 +0200 Subject: [PATCH 3/3] IB/ucm: fix UCM link error Building UCM with CONFIG_INFINIBAND_USER_ACCESS=m results in a set of link errors including: drivers/infiniband/core/ucm.o: In function `ib_ucm_event_handler': ucm.c:(.text+0x6dc): undefined reference to `ib_copy_path_rec_to_user' drivers/infiniband/core/ucma.o: In function `ucma_event_handler': ucma.c:(.text+0xdc0): undefined reference to `ib_copy_ah_attr_to_user' To get it to build-test again, this makes the option itself a tristate, which lets Kconfig figure out the dependency correctly. Fixes: 486edfb1039d ("IB/ucm: Fix compiling ucm.c") Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index d160d2d1f3a3..abb6660c099c 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -36,7 +36,7 @@ config INFINIBAND_USER_ACCESS rdma-core . config INFINIBAND_USER_ACCESS_UCM - bool "Userspace CM (UCM, DEPRECATED)" + tristate "Userspace CM (UCM, DEPRECATED)" depends on BROKEN || COMPILE_TEST depends on INFINIBAND_USER_ACCESS help