From 5701659004d68085182d2fd4199c79172165fa65 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 27 Dec 2016 18:23:06 -0800 Subject: [PATCH 01/52] net: stmmac: Fix race between stmmac_drv_probe and stmmac_open There is currently a small window during which the network device registered by stmmac can be made visible, yet all resources, including and clock and MDIO bus have not had a chance to be set up, this can lead to the following error to occur: [ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): stmmac_dvr_probe: warning: cannot get CSR clock [ 473.919382] stmmaceth 0000:01:00.0: no reset control found [ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42 [ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported [ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported [ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported [ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): Enable RX Mitigation via HW Watchdog Timer [ 473.921395] libphy: PHY stmmac-1:00 not found [ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY [ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to PHY (error: -19) [ 473.959710] libphy: stmmac: probed [ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL (stmmac-1:00) active [ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL (stmmac-1:01) [ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL (stmmac-1:02) [ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL (stmmac-1:03) Fix this by making sure that register_netdev() is the last thing being done, which guarantees that the clock and the MDIO bus are available. Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove") Reported-by: Kweh, Hock Leong Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bb40382e205d..5910ea51f8f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3339,13 +3339,6 @@ int stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); - ret = register_netdev(ndev); - if (ret) { - netdev_err(priv->dev, "%s: ERROR %i registering the device\n", - __func__, ret); - goto error_netdev_register; - } - /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -3372,11 +3365,14 @@ int stmmac_dvr_probe(struct device *device, } } - return 0; + ret = register_netdev(ndev); + if (ret) + netdev_err(priv->dev, "%s: ERROR %i registering the device\n", + __func__, ret); + + return ret; error_mdio_register: - unregister_netdev(ndev); -error_netdev_register: netif_napi_del(&priv->napi); error_hw_init: clk_disable_unprepare(priv->pclk); From 0df0f207aab4f42e5c96a807adf9a6845b69e984 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 28 Dec 2016 14:54:47 +0200 Subject: [PATCH 02/52] net/sched: cls_flower: Fix missing addr_type in classify Since we now use a non zero mask on addr_type, we are matching on its value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip failed in SW/classify path since its value was zero. This patch sets the proper value of addr_type for encapsulated packets. Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type') Signed-off-by: Paul Blakey Reviewed-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 333f8e268431..970db7a41684 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -153,10 +153,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, switch (ip_tunnel_info_af(info)) { case AF_INET: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV4_ADDRS; skb_key.enc_ipv4.src = key->u.ipv4.src; skb_key.enc_ipv4.dst = key->u.ipv4.dst; break; case AF_INET6: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV6_ADDRS; skb_key.enc_ipv6.src = key->u.ipv6.src; skb_key.enc_ipv6.dst = key->u.ipv6.dst; break; From 9da34cd34e85aacc55af8774b81b1f23e86014f9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 28 Dec 2016 14:58:31 +0200 Subject: [PATCH 03/52] net/mlx5: Disable RoCE on the e-switch management port under switchdev mode Under the switchdev/offloads mode, packets that don't match any e-switch steering rule are sent towards the e-switch management port. We use a NIC HW steering rule set per vport (uplink and VFs) to make them be received into the host OS through the respective vport representor netdevice. Currnetly such missed RoCE packets will not get to this NIC steering rule, and hence VF RoCE will not work over the slow path of the offloads mode. This is b/c these packets will be matched by a steering rule added by the firmware that serves RoCE traffic set on the PF NIC vport which is also the e-switch management port under SRIOV. Disabling RoCE on the e-switch management vport when we are in the offloads mode, will signal to the firmware to remove their RoCE rule, and then the missed RoCE packets will be matched by the representor NIC steering rule as any other missed packets. To achieve that, we disable RoCE on the PF vport. We do that by removing (hot-unplugging) the IB device instance associated with the PF. This is also required by our current model where the PF serves as the uplink representor and hence only SW switching (TC, bridge, OVS) applications and slow path vport mlx5e net-device should be running over that vport. Fixes: c930a3ad7453 ('net/mlx5e: Add devlink based SRIOV mode changes') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 466e161010f7..03293ed1cc22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -695,6 +695,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto err_reps; } + + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return 0; err_reps: @@ -718,6 +724,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { From 883371c453b937f9eb581fb4915210865982736f Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 28 Dec 2016 14:58:32 +0200 Subject: [PATCH 04/52] net/mlx5: Check FW limitations on log_max_qp before setting it When setting HCA capabilities, set log_max_qp to be the minimum between the selected profile's value and the HCA limitation. Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 54e5a786f191..23c12f1aaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -503,6 +503,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); From 689a248df83b6032edc57e86267b4e5cc8d7174e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 28 Dec 2016 14:58:33 +0200 Subject: [PATCH 05/52] net/mlx5: Cancel recovery work in remove flow If there is pending delayed work for health recovery it must be canceled if the device is being unloaded. Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23c12f1aaa39..0b49739eadd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1196,6 +1196,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; + mlx5_drain_health_wq(dev); + mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1358,10 +1360,9 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain health wq */ + /* In case of kernel call save the pci state */ if (state) { pci_save_state(pdev); - mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } From d151d73dcc99de87c63bdefebcc4cb69de1cdc40 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 28 Dec 2016 14:58:34 +0200 Subject: [PATCH 06/52] net/mlx5: Avoid shadowing numa_node Avoid using a local variable named numa_node to avoid shadowing a public one. Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0b49739eadd3..6547f22e6b9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -582,7 +582,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -590,7 +589,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return -ENOMEM; } - cpumask_set_cpu(cpumask_local_spread(i, numa_node), + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); From 077b1e8069b9b74477b01d28f6b83774dc19a142 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2016 14:58:35 +0200 Subject: [PATCH 07/52] net/mlx5: Mask destination mac value in ethtool steering rules We need to mask the destination mac value with the destination mac mask when adding steering rule via ethtool. Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3691451c728c..d088effd7160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, } if (fs->flow_type & FLOW_MAC_EXT && !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), fs->m_ext.h_dest); From 9b8c514291a83e53c073b473bdca6267f17a02c2 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2016 14:58:36 +0200 Subject: [PATCH 08/52] net/mlx5: Release FTE lock in error flow Release the FTE lock when adding rule to the FTE has failed. Fixes: 0fd758d6112f ('net/mlx5: Don't unlock fte while still using it') Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a263d8904a4c..0ac7a2fc916c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1263,6 +1263,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { + unlock_ref_node(&fte->node); kfree(fte); goto unlock_fg; } From ccce1700263d8b5b219359d04180492a726cea16 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Wed, 28 Dec 2016 14:58:37 +0200 Subject: [PATCH 09/52] net/mlx5: Prevent setting multicast macs for VFs Need to check that VF mac address entered by the admin user is either zero or unicast mac. Multicast mac addresses are prohibited. Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d6807c3cc461..f14d9c9ba773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); From 465db5dab86d6688fa5132edd1237102f4a20e84 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Dec 2016 14:58:38 +0200 Subject: [PATCH 10/52] Revert "net/mlx5e: Expose PCIe statistics to ethtool" This reverts commit 9c7262399ba12825f3ca4b00a76d8d5e77c720f5. PCIe counters were introduced in a new firmware version, as a result users with old firmware encountered a syndrome every 200ms due to update stats work. This feature will be re-introduced later with appropriate capabilities infrastructure. Fixes: 9c7262399ba1 ("net/mlx5e: Expose PCIe statistics to ethtool") Signed-off-by: Gal Pressman Reported-by: Jesper Dangaard Brouer Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 17 ---------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 -------------- .../ethernet/mellanox/mlx5/core/en_stats.h | 32 +------------------ 3 files changed, 1 insertion(+), 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 352462af8d51..33a399a8b5d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -171,7 +171,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - NUM_PCIE_COUNTERS + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -219,14 +218,6 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_tas_stats_desc[i].format); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -339,14 +330,6 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); - for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); - - for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters, - pcie_tas_stats_desc, i); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cbfa38fc72c0..be5ef036401d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -291,36 +291,12 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) &qcnt->rx_out_of_buffer); } -static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; - struct mlx5_core_dev *mdev = priv->mdev; - int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); - void *out; - u32 *in; - - in = mlx5_vzalloc(sz); - if (!in) - return; - - out = pcie_stats->pcie_perf_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - out = pcie_stats->pcie_tas_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - kvfree(in); -} - void mlx5e_update_stats(struct mlx5e_priv *priv) { mlx5e_update_q_counter(priv); mlx5e_update_vport_counters(priv); mlx5e_update_pport_counters(priv); mlx5e_update_sw_counters(priv); - mlx5e_update_pcie_counters(priv); } void mlx5e_update_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f202f872f57f..ba5db1dd23a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -39,7 +39,7 @@ #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ (*(u32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ - be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) @@ -276,32 +276,6 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; -#define PCIE_PERF_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_PERF_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \ - counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_TAS_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c) -#define PCIE_TAS_GET(pcie_stats, c) \ - MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \ - counter_set.pcie_tas_cntrs_grp_data_layout.c) - -struct mlx5e_pcie_stats { - __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; - __be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; -}; - -static const struct counter_desc pcie_perf_stats_desc[] = { - { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, - { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, -}; - -static const struct counter_desc pcie_tas_stats_desc[] = { - { "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) }, - { "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) }, -}; - struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -386,8 +360,6 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) -#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) -#define NUM_PCIE_TAS_COUNTERS ARRAY_SIZE(pcie_tas_stats_desc) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ @@ -397,7 +369,6 @@ static const struct counter_desc sq_stats_desc[] = { NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS (NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) @@ -406,7 +377,6 @@ struct mlx5e_stats { struct mlx5e_qcounter_stats qcnt; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct mlx5e_pcie_stats pcie; struct rtnl_link_stats64 vf_vport; }; From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Dec 2016 14:58:39 +0200 Subject: [PATCH 11/52] Revert "net/mlx5: Add MPCNT register infrastructure" This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51. Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure") Signed-off-by: Gal Pressman Reported-by: Jesper Dangaard Brouer Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 5 -- include/linux/mlx5/driver.h | 1 - include/linux/mlx5/mlx5_ifc.h | 93 ----------------------------------- 3 files changed, 99 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9f489365b3d3..52b437431c6a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1071,11 +1071,6 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -enum { - MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, - MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, -}; - static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ae55361e674..735b36335f29 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -123,7 +123,6 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, - MLX5_REG_MPCNT = 0x9051, }; enum mlx5_dcbx_oper_mode { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57bec544e20a..a852e9db6f0d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; -struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 rx_errors[0x20]; - - u8 tx_errors[0x20]; - - u8 l0_to_recovery_eieos[0x20]; - - u8 l0_to_recovery_ts[0x20]; - - u8 l0_to_recovery_framing[0x20]; - - u8 l0_to_recovery_retrain[0x20]; - - u8 crc_error_dllp[0x20]; - - u8 crc_error_tlp[0x20]; - - u8 reserved_at_140[0x680]; -}; - -struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 time_to_boot_image_start[0x20]; - - u8 time_to_link_image[0x20]; - - u8 calibration_time[0x20]; - - u8 time_to_first_perst[0x20]; - - u8 time_to_detect_state[0x20]; - - u8 time_to_l0[0x20]; - - u8 time_to_crs_en[0x20]; - - u8 time_to_plastic_image_start[0x20]; - - u8 time_to_iron_image_start[0x20]; - - u8 perst_handler[0x20]; - - u8 times_in_l1[0x20]; - - u8 times_in_l23[0x20]; - - u8 dl_down[0x20]; - - u8 config_cycle1usec[0x20]; - - u8 config_cycle2to7usec[0x20]; - - u8 config_cycle_8to15usec[0x20]; - - u8 config_cycle_16_to_63usec[0x20]; - - u8 config_cycle_64usec[0x20]; - - u8 correctable_err_msg_sent[0x20]; - - u8 non_fatal_err_msg_sent[0x20]; - - u8 fatal_err_msg_sent[0x20]; - - u8 reserved_at_2e0[0x4e0]; -}; - struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; -union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { - struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; - struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; - u8 reserved_at_0[0x7c0]; -}; - union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; -struct mlx5_ifc_mpcnt_reg_bits { - u8 reserved_at_0[0x8]; - u8 pcie_index[0x8]; - u8 reserved_at_10[0xa]; - u8 grp[0x6]; - - u8 clr[0x1]; - u8 reserved_at_21[0x1f]; - - union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; -}; - struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; - struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; From 4525a45bfad55a00ef218c5fbe5d98a3d8170bf5 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 28 Dec 2016 14:58:40 +0200 Subject: [PATCH 12/52] net/mlx5e: Check ets capability before initializing ets settings During the initial setup, the ets command is sent to firmware without checking if the HCA supports ets. This causes the invalid command error. Add the ets capiblity check before sending firmware command to initialize ets settings. Fixes: e207b7e99176 ("net/mlx5e: ConnectX-4 firmware support for DCBX") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 7f6c225666c1..f0b460f47f29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -723,6 +723,9 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) int i; struct ieee_ets ets; + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + memset(&ets, 0, sizeof(ets)); ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; for (i = 0; i < ets.ets_cap; i++) { From 610e89e05c3f28a7394935aa6b91f99548c4fd3c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:41 +0200 Subject: [PATCH 13/52] net/mlx5e: Don't sync netdev state when not registered Skip setting netdev vxlan ports and netdev rx_mode on driver load when netdev is not yet registered. Synchronizing with netdev state is needed only on reset flow where the netdev remains registered for the whole reset period. This also fixes an access before initialization of net_device.addr_list_lock - which for some reason initialized on register_netdev - where we queued set_rx_mode work on driver load before netdev registration. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reported-by: Sebastian Ott Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index be5ef036401d..cf270f6c90e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3781,14 +3781,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); @@ -3798,6 +3791,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rep.netdev = netdev; mlx5_eswitch_register_vport_rep(esw, 0, &rep); } + + if (netdev->reg_state != NETREG_REGISTERED) + return; + + /* Device already registered: sync netdev system state */ + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + queue_work(priv->wq, &priv->set_rx_mode_work); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) From 37f304d10030bb425c19099e7b955d9c3ec4cba3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:42 +0200 Subject: [PATCH 14/52] net/mlx5e: Disable netdev after close Disable netdev should come after it was closed, although no harm of doing it before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cf270f6c90e8..1236b27b1493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3947,10 +3947,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); rtnl_lock(); if (netif_running(netdev)) @@ -3958,6 +3954,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) netif_device_detach(netdev); rtnl_unlock(); + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); From 9dd0f896d2cc5815d859e945db90915071cd44b3 Mon Sep 17 00:00:00 2001 From: Augusto Mecking Caringi Date: Wed, 28 Dec 2016 16:02:05 +0000 Subject: [PATCH 15/52] net: atm: Fix warnings in net/atm/lec.c when !CONFIG_PROC_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the following warnings when CONFIG_PROC_FS is not set: linux/net/atm/lec.c: In function ‘lane_module_cleanup’: linux/net/atm/lec.c:1062:27: error: ‘atm_proc_root’ undeclared (first use in this function) remove_proc_entry("lec", atm_proc_root); ^ linux/net/atm/lec.c:1062:27: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Augusto Mecking Caringi Signed-off-by: David S. Miller --- net/atm/lec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/atm/lec.c b/net/atm/lec.c index 019557d0a11d..09cfe87f0a44 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1059,7 +1059,9 @@ static void __exit lane_module_cleanup(void) { int i; +#ifdef CONFIG_PROC_FS remove_proc_entry("lec", atm_proc_root); +#endif deregister_atm_ioctl(&lane_ioctl_ops); From 60133867f1f111aaf3a8c00375b8026142a9a591 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Dec 2016 16:44:23 +0000 Subject: [PATCH 16/52] net: wan: slic_ds26522: fix spelling mistake: "configurated" -> "configured" trivial fix to spelling mistake in pr_info message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/slic_ds26522.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index b776a0ab106c..9d9b4e0def2a 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -218,7 +218,7 @@ static int slic_ds26522_probe(struct spi_device *spi) ret = slic_ds26522_init_configure(spi); if (ret == 0) - pr_info("DS26522 cs%d configurated\n", spi->chip_select); + pr_info("DS26522 cs%d configured\n", spi->chip_select); return ret; } From e4c5e13aa45c23692e4acf56f0b3533f328199b2 Mon Sep 17 00:00:00 2001 From: Zheng Li Date: Wed, 28 Dec 2016 23:23:46 +0800 Subject: [PATCH 17/52] ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output There is an inconsistent conditional judgement between __ip6_append_data and ip6_finish_output functions, the variable length in __ip6_append_data just include the length of application's payload and udp6 header, don't include the length of ipv6 header, but in ip6_finish_output use (skb->len > ip6_skb_dst_mtu(skb)) as judgement, and skb->len include the length of ipv6 header. That causes some particular application's udp6 payloads whose length are between (MTU - IPv6 Header) and MTU were fragmented by ip6_fragment even though the rst->dev support UFO feature. Add the length of ipv6 header to length in __ip6_append_data to keep consistent conditional judgement as ip6_finish_output for ip6 fragment. Signed-off-by: Zheng Li Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 70d0de404197..38122d04fadc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1373,7 +1373,7 @@ emsgsize: */ cork->length += length; - if (((length > mtu) || + if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && From b2eb09af7370fedc6b9d9f05762f01625438467a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 28 Dec 2016 15:44:41 -0800 Subject: [PATCH 18/52] net: stmmac: Fix error path after register_netdev move Commit 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open") re-ordered how the MDIO bus registration and the network device are registered, but missed to unwind the MDIO bus registration in case we fail to register the network device. Fixes: 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open") Signed-off-by: Florian Fainelli Acked-by: Kweh, Hock Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5910ea51f8f6..39eb7a65bb9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3366,12 +3366,19 @@ int stmmac_dvr_probe(struct device *device, } ret = register_netdev(ndev); - if (ret) + if (ret) { netdev_err(priv->dev, "%s: ERROR %i registering the device\n", __func__, ret); + goto error_netdev_register; + } return ret; +error_netdev_register: + if (priv->hw->pcs != STMMAC_PCS_RGMII && + priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); error_mdio_register: netif_napi_del(&priv->napi); error_hw_init: From 4775cc1f2d5abca894ac32774eefc22c45347d1c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 28 Dec 2016 17:52:15 +0100 Subject: [PATCH 19/52] rtnl: stats - add missing netlink message size checks We miss to check if the netlink message is actually big enough to contain a struct if_stats_msg. Add a check to prevent userland from sending us short messages that would make us access memory beyond the end of the message. Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...") Signed-off-by: Mathias Krause Cc: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 18b5aae99bec..75e3ea7bda08 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3898,6 +3898,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) u32 filter_mask; int err; + if (nlmsg_len(nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); @@ -3947,6 +3950,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; + if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(cb->nlh); filter_mask = ifsm->filter_mask; if (!filter_mask) From f0c16ba8933ed217c2688b277410b2a37ba81591 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 29 Dec 2016 16:45:04 +0800 Subject: [PATCH 20/52] net: fix incorrect original ingress device index in PKTINFO When we send a packet for our own local address on a non-loopback interface (e.g. eth0), due to the change had been introduced from commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the original ingress device index would be set as the loopback interface. However, the packet should be considered as if it is being arrived via the sending interface (eth0), otherwise it would break the expectation of the userspace application (e.g. the DHCPRELEASE message from dhcp_release binary would be ignored by the dnsmasq daemon, since it come from lo which is not the interface dnsmasq bind to) Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO") Acked-by: David Ahern Signed-off-by: Wei Zhang Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 57e1405e8282..53ae0c6315ad 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1225,8 +1225,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; From 3b01fe7f91c8e4f9afc4fae3c5af72c14958d2d8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:09 +0200 Subject: [PATCH 21/52] net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering rule (which results in freeing the rule structure), and then references a field in this struct (the qp number) when releasing the busy-status on the rule's qp. Since this memory was freed, it could reallocated and changed. Therefore, the qp number in the struct may be incorrect, so that we are releasing the incorrect qp. This leaves the rule's qp in the busy state (and could possibly release an incorrect qp as well). Fix this by saving the qp number in a local variable, for use after removing the steering rule. Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index c548beaaf910..4b3e139e9c82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4473,6 +4473,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct res_qp *rqp; struct res_fs_rule *rrule; u64 mirr_reg_id; + int qpn; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) @@ -4489,10 +4490,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, } mirr_reg_id = rrule->mirr_rule_id; kfree(rrule->mirr_mbox); + qpn = rrule->qpn; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); - err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; @@ -4517,7 +4519,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, if (!err) atomic_dec(&rqp->ref_count); out: - put_res(dev, slave, rrule->qpn, RES_QP); + put_res(dev, slave, qpn, RES_QP); return err; } From 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 29 Dec 2016 18:37:10 +0200 Subject: [PATCH 22/52] net/mlx4_en: Fix bad WQE issue Single send WQE in RX buffer should be stamped with software ownership in order to prevent the flow of QP in error in FW once UPDATE_QP is called. Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 3c37e216bbf3..eac527e25ec9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; From c1d5f8ff80ea84768f5fae1ca9d1abfbb5e6bbaa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Dec 2016 18:37:11 +0200 Subject: [PATCH 23/52] net/mlx4: Remove BUG_ON from ICM allocation routine This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent() by checking DMA address alignment in advance and performing proper folding in case of error. Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory") Reported-by: Ozgur Karatas Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 2a9dd460a95f..e1f9e7cebf8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, if (!buf) return -ENOMEM; + if (offset_in_page(buf)) { + dma_free_coherent(dev, PAGE_SIZE << order, + buf, sg_dma_address(mem)); + return -ENOMEM; + } + sg_set_buf(mem, buf, PAGE_SIZE << order); - BUG_ON(mem->offset); sg_dma_len(mem) = PAGE_SIZE << order; return 0; } From 61b6034c6cfdcb265bb453505c3d688e7567727a Mon Sep 17 00:00:00 2001 From: Slava Shwartsman Date: Thu, 29 Dec 2016 18:37:12 +0200 Subject: [PATCH 24/52] net/mlx4_en: Fix type mismatch for 32-bit systems is_power_of_2 expects unsigned long and we pass u64 max_val_cycles, this will be truncated on 32 bit systems, and the result is not what we were expecting. div_u64 expects u32 as a second argument and we pass max_val_cycles_rounded which is u64 hence it will always be truncated. Fix was tested on both 64 and 32 bit systems and got same results for max_val_cycles and max_val_cycles_rounded. Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system") Signed-off-by: Slava Shwartsman Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 015198c14fa8..504461a464c5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; - u64 tmp_rounded = - roundup_pow_of_two(max_val_cycles) > max_val_cycles ? - roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; - u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : tmp_rounded; + u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ - u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); /* This comes from the reverse of clocksource_khz2mult */ return ilog2(div_u64(max_mul * freq_khz, 1000000)); From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:13 +0200 Subject: [PATCH 25/52] net/mlx4_core: Fix raw qp flow steering rules under SRIOV Demoting simple flow steering rule priority (for DPDK) was achieved by wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper function for the PF and all VFs. In function mlx4_ib_create_flow(), this change caused the main rule creation for the PF to be wrapped, while it left the associated tunnel steering rule creation unwrapped for the PF. This mismatch caused rule deletion failures in mlx4_ib_destroy_flow() for the PF when the detach wrapper function did not find the associated tunnel-steering rule (since creation of that rule for the PF did not go through the wrapper function). Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native" (so that the PF invocation does not go through the wrapper), and perform the required priority demotion for the PF in the mlx4_ib_create_flow() code path. Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 18 +++++++++++++++ .../ethernet/mellanox/mlx4/resource_tracker.c | 22 +------------------ include/linux/mlx4/device.h | 2 ++ 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c8413fc120e6..7031a8dd4d14 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..bffa6f345f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4b3e139e9c82..56185a0b827d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 93bdb3485192..6533c16e27ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); From 2344ef3c86a7fe41f97bf66c7936001b6132860b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 30 Dec 2016 00:07:38 +0300 Subject: [PATCH 26/52] sh_eth: fix branch prediction in sh_eth_interrupt() IIUC, likely()/unlikely() should apply to the whole *if* statement's expression, not a part of it -- fix such expression in sh_eth_interrupt() accordingly... Fixes: 283e38db65e7 ("sh_eth: Fix serialisation of interrupt disable with interrupt & NAPI handlers") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f341c1bc7001..0af7fc279c85 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1656,7 +1656,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) else goto out; - if (!likely(mdp->irq_enabled)) { + if (unlikely(!mdp->irq_enabled)) { sh_eth_write(ndev, 0, EESIPR); goto out; } From f5a0aab84b74de68523599817569c057c7ac1622 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 29 Dec 2016 15:29:03 -0800 Subject: [PATCH 27/52] net: ipv4: dst for local input routes should use l3mdev if relevant IPv4 output routes already use l3mdev device instead of loopback for dst's if it is applicable. Change local input routes to do the same. This fixes icmp responses for unreachable UDP ports which are directed to the wrong table after commit 9d1a6c4ea43e4 because local_input routes use the loopback device. Moving from ingress device to loopback loses the L3 domain causing responses based on the dst to get to lost. Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to determine L3 domain") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a82a11747b3f..0fcac8e7a2b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1914,7 +1914,8 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; From e1a3a60a2ebe991605acb14cd58e39c0545e174e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Dec 2016 17:42:32 -0600 Subject: [PATCH 28/52] net: socket: don't set sk_uid to garbage value in ->setattr() ->setattr() was recently implemented for socket files to sync the socket inode's uid to the new 'sk_uid' member of struct sock. It does this by copying over the ia_uid member of struct iattr. However, ia_uid is actually only valid when ATTR_UID is set in ia_valid, indicating that the uid is being changed, e.g. by chown. Other metadata operations such as chmod or utimes leave ia_uid uninitialized. Therefore, sk_uid could be set to a "garbage" value from the stack. Fix this by only copying the uid over when ATTR_UID is set. Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 8487bf136e5c..a8c2307590b8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -537,7 +537,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); - if (!err) { + if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); sock->sk->sk_uid = iattr->ia_uid; From 4200462d88f47f3759bdf4705f87e207b0f5b2e4 Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Sat, 31 Dec 2016 21:11:57 +0100 Subject: [PATCH 29/52] drop_monitor: add missing call to genlmsg_end Update nlmsg_len field with genlmsg_end to enable userspace processing using nlmsg_next helper. Also adds error handling. Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 8e0c0635ee97..f465bad2ef2c 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -75,6 +75,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -82,17 +83,31 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); - } + if (!skb) + goto err; + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + msg = nla_data(nla); + memset(msg, 0, al); + genlmsg_end(skb, msg_header); + goto out; + +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); From 97b84fd6d91766ea57dcc350d78f42639e011c30 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:19 +0100 Subject: [PATCH 30/52] l2tp: consider '::' as wildcard address in l2tp_ip6 socket lookup An L2TP socket bound to the unspecified address should match with any address. If not, it can't receive any packet and __l2tp_ip6_bind_lookup() can't prevent another socket from binding on the same device/tunnel ID. While there, rename the 'addr' variable to 'sk_laddr' (local addr), to make following patch clearer. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f092ac441fdd..3135b9d55df5 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -64,7 +64,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - const struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -72,7 +72,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && - (!addr || ipv6_addr_equal(addr, laddr)) && + (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; From a9b2dff80be979432484afaf7f8d8e73f9e8838a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Dec 2016 19:48:20 +0100 Subject: [PATCH 31/52] l2tp: take remote address into account in l2tp_ip and l2tp_ip6 socket lookups For connected sockets, __l2tp_ip{,6}_bind_lookup() needs to check the remote IP when looking for a matching socket. Otherwise a connected socket can receive traffic not originating from its peer. Drop l2tp_ip_bind_lookup() and l2tp_ip6_bind_lookup() instead of updating their prototype, as these functions aren't used. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 19 ++++++------------- net/l2tp/l2tp_ip6.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 8938b6ba57a0..3d73278b86ca 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -47,7 +47,8 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) return (struct l2tp_ip_sock *)sk; } -static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) +static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, + __be32 raddr, int dif, u32 tunnel_id) { struct sock *sk; @@ -61,6 +62,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -71,15 +73,6 @@ found: return sk; } -static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -183,8 +176,8 @@ pass_up: struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb), - tunnel_id); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, + inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip_lock); goto discard; @@ -280,7 +273,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ write_lock_bh(&l2tp_ip_lock); - if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, + if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip_lock); ret = -EADDRINUSE; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 3135b9d55df5..331ccf5a7bad 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -59,12 +59,14 @@ static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk) static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, + const struct in6_addr *raddr, int dif, u32 tunnel_id) { struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk); + const struct in6_addr *sk_raddr = &sk->sk_v6_daddr; struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -73,6 +75,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, if ((l2tp->conn_id == tunnel_id) && net_eq(sock_net(sk), net) && (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) && + (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) && (!sk->sk_bound_dev_if || !dif || sk->sk_bound_dev_if == dif)) goto found; @@ -83,17 +86,6 @@ found: return sk; } -static inline struct sock *l2tp_ip6_bind_lookup(struct net *net, - struct in6_addr *laddr, - int dif, u32 tunnel_id) -{ - struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id); - if (sk) - sock_hold(sk); - - return sk; -} - /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header @@ -197,8 +189,8 @@ pass_up: struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb), - tunnel_id); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&l2tp_ip6_lock); goto discard; @@ -330,7 +322,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); write_lock_bh(&l2tp_ip6_lock); - if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if, + if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&l2tp_ip6_lock); err = -EADDRINUSE; From 35f432a03e41d3bf08c51ede917f94e2288fbe8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jan 2017 11:19:29 +0100 Subject: [PATCH 32/52] mac80211: initialize fast-xmit 'info' later In ieee80211_xmit_fast(), 'info' is initialized to point to the skb that's passed in, but that skb may later be replaced by a clone (if it was shared), leading to an invalid pointer. This can lead to use-after-free and also later crashes since the real SKB's info->hw_queue doesn't get initialized properly. Fix this by assigning info only later, when it's needed, after the skb replacement (may have) happened. Cc: stable@vger.kernel.org Reported-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2c21b7039136..0d8b716e509e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3287,7 +3287,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -3351,6 +3351,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; From d0af683407a26a4437d8fa6e283ea201f2ae8146 Mon Sep 17 00:00:00 2001 From: Ian Kumlien Date: Mon, 2 Jan 2017 09:18:35 +0100 Subject: [PATCH 33/52] flow_dissector: Update pptp handling to avoid null pointer deref. __skb_flow_dissect can be called with a skb or a data packet, either can be NULL. All calls seems to have been moved to __skb_header_pointer except the pptp handling which is still calling skb_header_pointer. skb_header_pointer will use skb->data and thus: [ 109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [ 109.557102] IP: [] __skb_flow_dissect+0xa88/0xce0 [ 109.557263] PGD 0 [ 109.557338] [ 109.557484] Oops: 0000 [#1] SMP [ 109.557562] Modules linked in: chaoskey [ 109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79 [ 109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015 [ 109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000 [ 109.558041] RIP: 0010:[] [] __skb_flow_dissect+0xa88/0xce0 [ 109.558203] RSP: 0018:ffff94087fc83d40 EFLAGS: 00010206 [ 109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800 [ 109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000 [ 109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022 [ 109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000 [ 109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f [ 109.558979] FS: 0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000 [ 109.559326] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0 [ 109.559753] Stack: [ 109.559957] 000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0 [ 109.560578] 0000000000000001 0000000000002000 0000000000000000 0000000000000000 [ 109.561200] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 109.561820] Call Trace: [ 109.562027] [ 109.562108] [] ? eth_get_headlen+0x7a/0xf0 [ 109.562522] [] ? igb_poll+0x96a/0xe80 [ 109.562737] [] ? net_rx_action+0x20b/0x350 [ 109.562953] [] ? __do_softirq+0xe8/0x280 [ 109.563169] [] ? irq_exit+0xaa/0xb0 [ 109.563382] [] ? do_IRQ+0x4b/0xc0 [ 109.563597] [] ? common_interrupt+0x7f/0x7f [ 109.563810] [ 109.563890] [] ? cpuidle_enter_state+0x130/0x2c0 [ 109.564304] [] ? cpuidle_enter_state+0x120/0x2c0 [ 109.564520] [] ? cpu_startup_entry+0x19f/0x1f0 [ 109.564737] [] ? start_secondary+0x12a/0x140 [ 109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00 00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2 04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84 24 84 [ 109.569959] RIP [] __skb_flow_dissect+0xa88/0xce0 [ 109.570245] RSP [ 109.570453] CR2: 0000000000000080 Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash") Signed-off-by: Ian Kumlien Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d6447dc10371..fe4e1531976c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -468,8 +468,9 @@ ip_proto_again: if (hdr->flags & GRE_ACK) offset += sizeof(((struct pptp_gre_header *)0)->ack); - ppp_hdr = skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), _ppp_hdr); + ppp_hdr = __skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), + data, hlen, _ppp_hdr); if (!ppp_hdr) goto out_bad; From 7ababb782690e03b78657e27bd051e20163af2d6 Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Mon, 2 Jan 2017 14:38:36 +0100 Subject: [PATCH 34/52] igmp: Make igmp group member RFC 3376 compliant 5.2. Action on Reception of a Query When a system receives a Query, it does not respond immediately. Instead, it delays its response by a random amount of time, bounded by the Max Resp Time value derived from the Max Resp Code in the received Query message. A system may receive a variety of Queries on different interfaces and of different kinds (e.g., General Queries, Group-Specific Queries, and Group-and-Source-Specific Queries), each of which may require its own delayed response. Before scheduling a response to a Query, the system must first consider previously scheduled pending responses and in many cases schedule a combined response. Therefore, the system must be able to maintain the following state: o A timer per interface for scheduling responses to General Queries. o A per-group and interface timer for scheduling responses to Group- Specific and Group-and-Source-Specific Queries. o A per-group and interface list of sources to be reported in the response to a Group-and-Source-Specific Query. When a new Query with the Router-Alert option arrives on an interface, provided the system has state to report, a delay for a response is randomly selected in the range (0, [Max Resp Time]) where Max Resp Time is derived from Max Resp Code in the received Query message. The following rules are then used to determine if a Report needs to be scheduled and the type of Report to schedule. The rules are considered in order and only the first matching rule is applied. 1. If there is a pending response to a previous General Query scheduled sooner than the selected delay, no additional response needs to be scheduled. 2. If the received Query is a General Query, the interface timer is used to schedule a response to the General Query after the selected delay. Any previously pending response to a General Query is canceled. --8<-- Currently the timer is rearmed with new random expiration time for every incoming query regardless of possibly already pending report. Which is not aligned with the above RFE. It also might happen that higher rate of incoming queries can postpone the report after the expiration time of the first query causing group membership loss. Now the per interface general query timer is rearmed only when there is no pending report already scheduled on that interface or the newly selected expiration time is before the already pending scheduled report. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 68d622133f53..5b15459955f8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } From 4e5da369df64628358e25ffedcf80ac43af3793d Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Mon, 2 Jan 2017 18:52:24 +0100 Subject: [PATCH 35/52] Documentation/networking: fix typo in mpls-sysctl s/utliziation/utilization Signed-off-by: Alexander Alemayhu Signed-off-by: David S. Miller --- Documentation/networking/mpls-sysctl.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt index 9ed15f86c17c..15d8d16934fd 100644 --- a/Documentation/networking/mpls-sysctl.txt +++ b/Documentation/networking/mpls-sysctl.txt @@ -5,8 +5,8 @@ platform_labels - INTEGER possible to configure forwarding for label values equal to or greater than the number of platform labels. - A dense utliziation of the entries in the platform label table - is possible and expected aas the platform labels are locally + A dense utilization of the entries in the platform label table + is possible and expected as the platform labels are locally allocated. If the number of platform label table entries is set to 0 no From 8f87e626b059f1b82b017f53c5ee91fbc4486e36 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:02 +0100 Subject: [PATCH 36/52] net: stmmac: dwmac-oxnas: fix of-node leak Use the syscon lookup-by-phandle helper so that the reference taken by of_parse_phandle() is released when done with the node. Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index c35597586121..fcc237e0aae1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -109,16 +109,9 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - struct device_node *sysctrl; struct oxnas_dwmac *dwmac; int ret; - sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0); - if (!sysctrl) { - dev_err(&pdev->dev, "failed to get sys-ctrl node\n"); - return -EINVAL; - } - ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) return ret; @@ -134,7 +127,8 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; - dwmac->regmap = syscon_node_to_regmap(sysctrl); + dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "oxsemi,sys-ctrl"); if (IS_ERR(dwmac->regmap)) { dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); return PTR_ERR(dwmac->regmap); From 6b4c212b95ce6a586473a772fb2d28ab22a38f0e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:03 +0100 Subject: [PATCH 37/52] net: stmmac: dwmac-oxnas: fix fixed-link-phydev leaks Make sure to deregister and free any fixed-link phy registered during probe on probe errors and on driver unbind by calling the new glue helper function. For driver unbind, use the generic stmmac-platform remove implementation and add an exit callback to disable the clock. Fixes: 5ed7414062e7 ("net: stmmac: Add OXNAS Glue Driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index fcc237e0aae1..3efd110613df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -105,6 +105,13 @@ static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) return 0; } +static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct oxnas_dwmac *dwmac = priv; + + clk_disable_unprepare(dwmac->clk); +} + static int oxnas_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -121,40 +128,44 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) - return -ENOMEM; + if (!dwmac) { + ret = -ENOMEM; + goto err_remove_config_dt; + } dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; + plat_dat->exit = oxnas_dwmac_exit; dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl"); if (IS_ERR(dwmac->regmap)) { dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); - return PTR_ERR(dwmac->regmap); + ret = PTR_ERR(dwmac->regmap); + goto err_remove_config_dt; } dwmac->clk = devm_clk_get(&pdev->dev, "gmac"); - if (IS_ERR(dwmac->clk)) - return PTR_ERR(dwmac->clk); + if (IS_ERR(dwmac->clk)) { + ret = PTR_ERR(dwmac->clk); + goto err_remove_config_dt; + } ret = oxnas_dwmac_init(dwmac); if (ret) - return ret; + goto err_remove_config_dt; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - clk_disable_unprepare(dwmac->clk); + goto err_dwmac_exit; - return ret; -} -static int oxnas_dwmac_remove(struct platform_device *pdev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - int ret = stmmac_dvr_remove(&pdev->dev); + return 0; - clk_disable_unprepare(dwmac->clk); +err_dwmac_exit: + oxnas_dwmac_exit(pdev, plat_dat->bsp_priv); +err_remove_config_dt: + stmmac_remove_config_dt(pdev, plat_dat); return ret; } @@ -197,7 +208,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); static struct platform_driver oxnas_dwmac_driver = { .probe = oxnas_dwmac_probe, - .remove = oxnas_dwmac_remove, + .remove = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", .pm = &oxnas_dwmac_pm_ops, From a8de4d719dfc12bc22192d7daef7c7ae6cfb8b80 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 2 Jan 2017 12:56:04 +0100 Subject: [PATCH 38/52] net: stmmac: dwmac-oxnas: use generic pm implementation Now that we have an exit callback in place, add init as well and get rid of the custom PM callbacks in favour of the generic ones. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 38 +++---------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 3efd110613df..3dc7d279f805 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -60,8 +60,9 @@ struct oxnas_dwmac { struct regmap *regmap; }; -static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) +static int oxnas_dwmac_init(struct platform_device *pdev, void *priv) { + struct oxnas_dwmac *dwmac = priv; unsigned int value; int ret; @@ -135,6 +136,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; + plat_dat->init = oxnas_dwmac_init; plat_dat->exit = oxnas_dwmac_exit; dwmac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, @@ -151,7 +153,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } - ret = oxnas_dwmac_init(dwmac); + ret = oxnas_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) goto err_remove_config_dt; @@ -170,36 +172,6 @@ err_remove_config_dt: return ret; } -#ifdef CONFIG_PM_SLEEP -static int oxnas_dwmac_suspend(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = stmmac_suspend(dev); - clk_disable_unprepare(dwmac->clk); - - return ret; -} - -static int oxnas_dwmac_resume(struct device *dev) -{ - struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); - int ret; - - ret = oxnas_dwmac_init(dwmac); - if (ret) - return ret; - - ret = stmmac_resume(dev); - - return ret; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops, - oxnas_dwmac_suspend, oxnas_dwmac_resume); - static const struct of_device_id oxnas_dwmac_match[] = { { .compatible = "oxsemi,ox820-dwmac" }, { } @@ -211,7 +183,7 @@ static struct platform_driver oxnas_dwmac_driver = { .remove = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", - .pm = &oxnas_dwmac_pm_ops, + .pm = &stmmac_pltfr_pm_ops, .of_match_table = oxnas_dwmac_match, }, }; From 515028fe29d84a15f77d071a13b2d34eb3d137af Mon Sep 17 00:00:00 2001 From: Bartosz Folta Date: Mon, 2 Jan 2017 12:41:50 +0000 Subject: [PATCH 39/52] net: macb: Updated resource allocation function calls to new version of API. Changed function calls of resource allocation to new API. Changed way of setting DMA mask. Removed unnecessary sanity check. This patch is sent in regard to recently applied patch Commit 83a77e9ec4150ee4acc635638f7dedd9da523a26 net: macb: Added PCI wrapper for Platform Driver. Signed-off-by: Bartosz Folta Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_pci.c | 27 +++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 92be2cd8f817..9906fda76087 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -1,5 +1,5 @@ /** - * macb_pci.c - Cadence GEM PCI wrapper. + * Cadence GEM PCI wrapper. * * Copyright (C) 2016 Cadence Design Systems - http://www.cadence.com * @@ -45,32 +45,27 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct macb_platform_data plat_data; struct resource res[2]; - /* sanity check */ - if (!id) - return -EINVAL; - /* enable pci device */ - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err < 0) { - dev_err(&pdev->dev, "Enabling PCI device has failed: 0x%04X", - err); - return -EACCES; + dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err); + return err; } pci_set_master(pdev); /* set up resources */ memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res)); - res[0].start = pdev->resource[0].start; - res[0].end = pdev->resource[0].end; + res[0].start = pci_resource_start(pdev, 0); + res[0].end = pci_resource_end(pdev, 0); res[0].name = PCI_DRIVER_NAME; res[0].flags = IORESOURCE_MEM; - res[1].start = pdev->irq; + res[1].start = pci_irq_vector(pdev, 0); res[1].name = PCI_DRIVER_NAME; res[1].flags = IORESOURCE_IRQ; - dev_info(&pdev->dev, "EMAC physical base addr = 0x%p\n", - (void *)(uintptr_t)pci_resource_start(pdev, 0)); + dev_info(&pdev->dev, "EMAC physical base addr: %pa\n", + &res[0].start); /* set up macb platform data */ memset(&plat_data, 0, sizeof(plat_data)); @@ -100,7 +95,7 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id) plat_info.num_res = ARRAY_SIZE(res); plat_info.data = &plat_data; plat_info.size_data = sizeof(plat_data); - plat_info.dma_mask = DMA_BIT_MASK(32); + plat_info.dma_mask = pdev->dma_mask; /* register platform device */ plat_dev = platform_device_register_full(&plat_info); @@ -120,7 +115,6 @@ err_hclk_register: clk_unregister(plat_data.pclk); err_pclk_register: - pci_disable_device(pdev); return err; } @@ -130,7 +124,6 @@ static void macb_remove(struct pci_dev *pdev) struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); platform_device_unregister(plat_dev); - pci_disable_device(pdev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); } From 5350d54f6cd12eaff623e890744c79b700bd3f17 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 2 Jan 2017 13:32:54 -0800 Subject: [PATCH 40/52] ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules In the case of custom rules being present we need to handle the case of the LOCAL table being intialized after the new rule has been added. To address that I am adding a new check so that we can make certain we don't use an alias of MAIN for LOCAL when allocating a new table. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Reported-by: Oliver Brunel Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3ff8938893ec..eae0332b0e8c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); From 096de2f83ebc8e0404c5b7e847a4abd27b9739da Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 3 Jan 2017 16:26:04 +0100 Subject: [PATCH 41/52] benet: stricter vxlan offloading check in be_features_check When VXLAN offloading is enabled, be_features_check() tries to check if an encapsulated packet is indeed a VXLAN packet. The check is not strict enough, and considers any UDP-encapsulated ethernet frame with a 8-byte tunnel header as being VXLAN. Unfortunately, both GENEVE and VXLAN-GPE have a 8-byte header, so they get through this check. Force the UDP destination port to be the one that has been offloaded to hardware. Without this, GENEVE-encapsulated packets can end up having an incorrect checksum when both a GENEVE and a VXLAN (offloaded) tunnel are configured. This is similar to commit a547224dceed ("mlx4e: Do not attempt to offload VXLAN ports that are unrecognized"). Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e1633bf5a22..225e9a4877d7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5155,7 +5155,9 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + sizeof(struct udphdr) + sizeof(struct vxlanhdr) || + !adapter->vxlan_port || + udp_hdr(skb)->dest != adapter->vxlan_port) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; From 3b48ab2248e61408910e792fe84d6ec466084c1a Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Tue, 3 Jan 2017 01:39:10 +0100 Subject: [PATCH 42/52] drop_monitor: consider inserted data in genlmsg_end Final nlmsg_len field update must reflect inserted net_dm_drop_point data. This patch depends on previous patch: "drop_monitor: add missing call to genlmsg_end" Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index f465bad2ef2c..fb55327dcfea 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -102,7 +102,6 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) } msg = nla_data(nla); memset(msg, 0, al); - genlmsg_end(skb, msg_header); goto out; err: @@ -112,6 +111,13 @@ out: swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } From 926d93a33e59b2729afdbad357233c17184de9d2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Jan 2017 09:37:55 -0800 Subject: [PATCH 43/52] net: vrf: Add missing Rx counters The move from rx-handler to L3 receive handler inadvertantly dropped the rx counters. Restore them. Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6") Reported-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7532646c3b7b..23dfb0eac098 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -967,6 +967,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, */ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); if (!ipv6_ndisc_frame(skb) && !need_strict) { + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1011,6 +1012,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } + vrf_rx_stats(vrf_dev, skb->len); + skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); From 63dfb0dac9055145db85ce764355aef2f563739a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Jan 2017 17:22:20 +0800 Subject: [PATCH 44/52] net: usb: asix_devices: add .reset_resume for USB PM The USB core may call reset_resume when it fails to resume asix device. And USB core can recovery this abnormal resume at low level driver, the same .resume at asix driver can work too. Add .reset_resume can avoid disconnecting after backing from system resume, and NFS can still be mounted after this commit. Signed-off-by: Peter Chen Signed-off-by: David S. Miller --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 6c646e228833..6e98ede997d3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1367,6 +1367,7 @@ static struct usb_driver asix_driver = { .probe = usbnet_probe, .suspend = asix_suspend, .resume = asix_resume, + .reset_resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, From a9a8cdb368d99bb655b5cdabea560446db0527cc Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 3 Jan 2017 21:25:48 +0530 Subject: [PATCH 45/52] libcxgb: fix error check for ip6_route_output() ip6_route_output() never returns NULL so check dst->error instead of !dst. Signed-off-by: Varun Prakash Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index 0f0de5b63622..d04a6c163445 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -133,17 +133,15 @@ cxgb_find_route6(struct cxgb4_lld_info *lldi, if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) fl6.flowi6_oif = sin6_scope_id; dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgb_our_interface(lldi, get_real_dev, - ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + if (dst->error || + (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) { dst_release(dst); - dst = NULL; + return NULL; } } -out: return dst; } EXPORT_SYMBOL(cxgb_find_route6); From cd7aeb1f9706b665ad8659df8ff036e7bc0097f4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 3 Jan 2017 13:57:00 -0500 Subject: [PATCH 46/52] LiquidIO VF: s/select/imply/ for PTP_1588_CLOCK Fix a minor fallout from the merge of the timers and the networking trees. The following error may result if the PTP_1588_CLOCK prerequisites are not available: drivers/built-in.o: In function `ptp_clock_unregister': (.text+0x40e0a5): undefined reference to `pps_unregister_source' drivers/built-in.o: In function `ptp_clock_unregister': (.text+0x40e0cc): undefined reference to `posix_clock_unregister' drivers/built-in.o: In function `ptp_clock_event': (.text+0x40e249): undefined reference to `pps_event' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e5e1): undefined reference to `pps_register_source' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e62c): undefined reference to `posix_clock_register' drivers/built-in.o: In function `ptp_clock_register': (.text+0x40e68d): undefined reference to `pps_unregister_source' Signed-off-by: Nicolas Pitre Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index bbc8bd16cb97..dcbce6cac63e 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -77,7 +77,7 @@ config OCTEON_MGMT_ETHERNET config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI - select PTP_1588_CLOCK + imply PTP_1588_CLOCK ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. From bb7da333d0a9f3bddc08f84187b7579a3f68fd24 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:48 -0800 Subject: [PATCH 47/52] net: systemport: Utilize skb_put_padto() Since we need to pad our packets, utilize skb_put_padto() which increases skb->len by how much we need to pad, allowing us to eliminate the test on skb->len right below. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 25d1eb4933d0..e67908b5edfe 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1028,13 +1028,12 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. */ - if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { ret = NETDEV_TX_OK; goto out; } - skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ? - ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len; + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); if (dma_mapping_error(kdev, mapping)) { From 38e5a85562a6cd911fc26d951d576551a688574c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 3 Jan 2017 16:34:49 -0800 Subject: [PATCH 48/52] net: systemport: Pad packet before inserting TSB Inserting the TSB means adding an extra 8 bytes in front the of packet that is going to be used as metadata information by the TDMA engine, but stripped off, so it does not really help with the packet padding. For some odd packet sizes that fall below the 60 bytes payload (e.g: ARP) we can end-up padding them after the TSB insertion, thus making them 64 bytes, but with the TDMA stripping off the first 8 bytes, they could still be smaller than 64 bytes which is required to ingress the switch. Fix this by swapping the padding and TSB insertion, guaranteeing that the packets have the right sizes. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index e67908b5edfe..7e8cf213fd81 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1012,15 +1012,6 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } - /* Insert TSB and checksum infos */ - if (priv->tsb_en) { - skb = bcm_sysport_insert_tsb(skb, dev); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } - } - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we @@ -1033,6 +1024,15 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, goto out; } + /* Insert TSB and checksum infos */ + if (priv->tsb_en) { + skb = bcm_sysport_insert_tsb(skb, dev); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; + } + } + skb_len = skb->len; mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); From 3fe61f0940d9c7892462c893602fdccfe8b24e8c Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Wed, 4 Jan 2017 13:21:29 +0200 Subject: [PATCH 49/52] dpaa_eth: cleanup after init_phy() failure Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 624ba9058dc4..77517aa3e8d9 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2291,7 +2291,8 @@ static int dpaa_open(struct net_device *net_dev) net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev); if (!net_dev->phydev) { netif_err(priv, ifup, net_dev, "init_phy() failed\n"); - return -ENODEV; + err = -ENODEV; + goto phy_init_failed; } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { @@ -2314,6 +2315,7 @@ mac_start_failed: for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) fman_port_disable(mac_dev->port[i]); +phy_init_failed: dpaa_eth_napi_disable(priv); return err; From 0fbb0f24dde8759925fc56e9dbc6a5b2cbba99c4 Mon Sep 17 00:00:00 2001 From: Roy Pledge Date: Wed, 4 Jan 2017 13:21:30 +0200 Subject: [PATCH 50/52] dpaa_eth: Initialize CGR structure before init The QBMan CGR options needs to be zeroed before calling the init function Signed-off-by: Roy Pledge Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 77517aa3e8d9..c9b7ad65e563 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -733,6 +733,7 @@ static int dpaa_eth_cgr_init(struct dpaa_priv *priv) priv->cgr_data.cgr.cb = dpaa_eth_cgscn; /* Enable Congestion State Change Notifications and CS taildrop */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; @@ -2422,6 +2423,7 @@ static int dpaa_ingress_cgr_init(struct dpaa_priv *priv) } /* Enable CS TD, but disable Congestion State Change Notifications. */ + memset(&initcgr, 0, sizeof(initcgr)); initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES); initcgr.cgr.cscn_en = QM_CGR_EN; cs_th = DPAA_INGRESS_CS_THRESHOLD; From 4fdda95893de776a8efdf661bbf0e338f2f13dcb Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 4 Jan 2017 15:10:56 +0000 Subject: [PATCH 51/52] sfc: don't report RX hash keys to ethtool when RSS wasn't enabled If we failed to set up RSS on EF10 (e.g. because firmware declared RX_RSS_LIMITED), ethtool --show-nfc $dev rx-flow-hash ... should report no fields, rather than confusingly reporting what fields we _would_ be hashing on if RSS was working. Fixes: dcb4123cbec0 ("sfc: disable RSS when unsupported") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 3 ++- drivers/net/ethernet/sfc/ethtool.c | 2 ++ drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/siena.c | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index de2947ccc5ad..5eb0e684fd76 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1323,7 +1323,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx) } /* don't fail init if RSS setup doesn't work */ - efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = (rc == 0); return 0; } diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 87bdc56b4e3a..18ebaea44e82 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -975,6 +975,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXFH: { info->data = 0; + if (!efx->rss_active) /* No RSS */ + return 0; switch (info->flow_type) { case UDP_V4_FLOW: if (efx->rx_hash_udp_4tuple) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 1a635ced62d0..1c62c1a00fca 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -860,6 +860,7 @@ struct vfdi_status; * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS * @rx_scatter: Scatter mode enabled for receives + * @rss_active: RSS enabled on hardware * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired @@ -998,6 +999,7 @@ struct efx_nic { u8 rx_hash_key[40]; u32 rx_indir_table[128]; bool rx_scatter; + bool rss_active; bool rx_hash_udp_4tuple; unsigned int_error_count; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index a3901bc96586..4e54e5dc9fcb 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -403,6 +403,7 @@ static int siena_init_nic(struct efx_nic *efx) efx_writeo(efx, &temp, FR_AZ_RX_CFG); siena_rx_push_rss_config(efx, false, efx->rx_indir_table); + efx->rss_active = true; /* Enable event logging */ rc = efx_mcdi_log_ctrl(efx, true, false, 0); From 71eae1ca77fd6be218d8a952d97bba827e56516d Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 4 Jan 2017 23:10:23 +0300 Subject: [PATCH 52/52] sh_eth: enable RX descriptor word 0 shift on SH7734 The RX descriptor word 0 on SH7734 has the RFS[9:0] field in bits 16-25 (bits 0-15 usually used for that are occupied by the packet checksum). Thus we need to set the 'shift_rd0' field in the SH7734 SoC data... Fixes: f0e81fecd4f8 ("net: sh_eth: Add support SH7734") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 0af7fc279c85..00fafabab1d0 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -819,6 +819,7 @@ static struct sh_eth_cpu_data sh7734_data = { .tsu = 1, .hw_crc = 1, .select_mii = 1, + .shift_rd0 = 1, }; /* SH7763 */