From f7418bc10d8402798ee3add5ef0ed5f33266a2bb Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 24 Sep 2015 14:59:49 +0200 Subject: [PATCH 01/92] mac80211: fix handling of PS filtering with fast-xmit Fixes dropped packets in the tx path in case a non-PS station triggers the tx filter. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/status.c | 1 + net/mac80211/tx.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8ba583243509..3ed7ddfbf8e8 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -101,6 +101,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * when it wakes up for the next time. */ set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT); + ieee80211_clear_fast_xmit(sta); /* * This code races in the following way: diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 84e0e8c7fb23..7892eb8ed4c8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1218,8 +1218,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, if (!tx->sta) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) { info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + ieee80211_check_fast_xmit(tx->sta); + } info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; @@ -2451,7 +2453,8 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) if (test_sta_flag(sta, WLAN_STA_PS_STA) || test_sta_flag(sta, WLAN_STA_PS_DRIVER) || - test_sta_flag(sta, WLAN_STA_PS_DELIVER)) + test_sta_flag(sta, WLAN_STA_PS_DELIVER) || + test_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT)) goto out; if (sdata->noack_map) From 029cd0370241641eb70235d205aa0b90c84dce44 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 24 Sep 2015 16:59:46 +0200 Subject: [PATCH 02/92] ath9k: declare required extra tx headroom ath9k inserts padding between the 802.11 header and the data area (to align it). Since it didn't declare this extra required headroom, this led to some nasty issues like randomly dropped packets in some setups. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 57f95f2dca5b..90eb75012e4f 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -880,6 +880,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->max_rate_tries = 10; hw->sta_data_size = sizeof(struct ath_node); hw->vif_data_size = sizeof(struct ath_vif); + hw->extra_tx_headroom = 4; hw->wiphy->available_antennas_rx = BIT(ah->caps.max_rxchains) - 1; hw->wiphy->available_antennas_tx = BIT(ah->caps.max_txchains) - 1; From 910367e374b97fad6ae31f74198ca537e2f3cfe5 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 31 Aug 2015 10:55:05 +0200 Subject: [PATCH 03/92] rt2800usb: add usb ID 1b75:3070 for Airlive WT-2000USB Brand: Airlive (Ovislink Corp.) Name: Turbo-G USB Adaptor Model: WT-2000USB USB ID: 1b75:3070 Reported-and-tested-by: Michael Coates Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/rt2x00/rt2800usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 5932306084fd..bf9afbf46c1b 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -1114,6 +1114,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x0db0, 0x871c) }, { USB_DEVICE(0x0db0, 0x899a) }, /* Ovislink */ + { USB_DEVICE(0x1b75, 0x3070) }, { USB_DEVICE(0x1b75, 0x3071) }, { USB_DEVICE(0x1b75, 0x3072) }, { USB_DEVICE(0x1b75, 0xa200) }, From c6ea46c6d3df06599daddbf3311744f3130e186c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 9 Sep 2015 07:16:42 +0200 Subject: [PATCH 04/92] b43: probe bcma core (device) rev 0x15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenWrt user reported b43 doesn't probe wireless core on SoC BCM5356A1: [ 0.000000] bcma: bus0: Found chip with id 0x5356, rev 0x01 and package 0x04 it is because this chip uses different 802.11 core revison than others: [ 0.000000] bcma: bus0: Core 1 found: IEEE 802.11 (manuf 0x4BF, id 0x812, rev 0x15, class 0x0) Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/net/wireless/b43/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 28490702124a..71d3e9adbf3c 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -120,6 +120,7 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over #ifdef CONFIG_B43_BCMA static const struct bcma_device_id b43_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x11, BCMA_ANY_CLASS), + BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x15, BCMA_ANY_CLASS), BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x17, BCMA_ANY_CLASS), BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x18, BCMA_ANY_CLASS), BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x1C, BCMA_ANY_CLASS), From 76d164f582150fd0259ec0fcbc485470bcd8033e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 17 Sep 2015 14:29:07 +0300 Subject: [PATCH 05/92] ath10k: fix DMA related firmware crashes on multiple devices Some platforms really don't like DMA bursts of 256 bytes, and this causes the firmware to crash when sending beacons. Also, changing this based on the firmware version does not seem to make much sense, so use 128 bytes for all versions. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/hw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 23afcda2de96..678d72af4a9d 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -337,7 +337,7 @@ enum ath10k_hw_rate_cck { #define TARGET_10X_MAX_FRAG_ENTRIES 0 /* 10.2 parameters */ -#define TARGET_10_2_DMA_BURST_SIZE 1 +#define TARGET_10_2_DMA_BURST_SIZE 0 /* Target specific defines for WMI-TLV firmware */ #define TARGET_TLV_NUM_VDEVS 4 @@ -391,7 +391,7 @@ enum ath10k_hw_rate_cck { #define TARGET_10_4_TX_DBG_LOG_SIZE 1024 #define TARGET_10_4_NUM_WDS_ENTRIES 32 -#define TARGET_10_4_DMA_BURST_SIZE 1 +#define TARGET_10_4_DMA_BURST_SIZE 0 #define TARGET_10_4_MAC_AGGR_DELIM 0 #define TARGET_10_4_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK 1 #define TARGET_10_4_VOW_CONFIG 0 From 7253054e5d05233063c48f57ac02283bd35753d8 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Wed, 30 Sep 2015 12:42:35 +0200 Subject: [PATCH 06/92] can: peak_pci: add unused device id. in devices table While new PEAK_PCIE_OEM_ID has been defined since 3.17, no corresponding entry has been added in the peak_pci_tbl[] of the peak_pci CAN driver. This patch enables now users of the PCAN-PCI Express OEM card to run the peak_pci driver too. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/peak_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index e5fac368068a..131026fbc2d7 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -87,6 +87,7 @@ static const struct pci_device_id peak_pci_tbl[] = { {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,}, #ifdef CONFIG_CAN_PEAK_PCIEC {PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, {PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, From 54328e64047a54b8fc2362c2e1f0fa16c90f739f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 2 Oct 2015 11:44:30 -0500 Subject: [PATCH 07/92] rtlwifi: rtl8821ae: Fix system lockups on boot In commit 1277fa2ab2f9 ("rtlwifi: Remove the clear interrupt routine from all drivers"), the code that cleared all interrupt enable bits before setting them was removed for all PCI drivers. This fixed an issue that caused TX to be blocked for 3-5 seconds. On some RTL8821AE units, this change causes soft lockups to occur on boot. For that reason, the portion of the earlier commit that applied to rtl8821ae is reverted. Kernels 4.1 and newer are affected. See http://marc.info/?l=linux-wireless&m=144373370103285&w=2 and https://bugzilla.opensuse.org/show_bug.cgi?id=944978 for two cases where this regression affected user systems. Note that this bug does not appear on any of the developer's setups. For those users whose systems are affected by the TX blockage, but do not lock up on boot, a module parameter is added to disable the interrupt clear Fixes: 1277fa2ab2f9 ("rtlwifi: Remove the clear interrupt routine from all drivers") Signed-off-by: Larry Finger Cc: Stable [V4.1+] Signed-off-by: Kalle Valo --- drivers/net/wireless/rtlwifi/pci.h | 2 ++ drivers/net/wireless/rtlwifi/rtl8821ae/hw.c | 17 +++++++++++++++++ drivers/net/wireless/rtlwifi/rtl8821ae/sw.c | 5 +++++ drivers/net/wireless/rtlwifi/wifi.h | 3 +++ 4 files changed, 27 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/pci.h b/drivers/net/wireless/rtlwifi/pci.h index d4567d12e07e..5da6703942d9 100644 --- a/drivers/net/wireless/rtlwifi/pci.h +++ b/drivers/net/wireless/rtlwifi/pci.h @@ -247,6 +247,8 @@ struct rtl_pci { /* MSI support */ bool msi_support; bool using_msi; + /* interrupt clear before set */ + bool int_clear; }; struct mp_adapter { diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index b7f18e2155eb..6e9418ed90c2 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -2253,11 +2253,28 @@ void rtl8821ae_set_qos(struct ieee80211_hw *hw, int aci) } } +static void rtl8821ae_clear_interrupt(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + u32 tmp = rtl_read_dword(rtlpriv, REG_HISR); + + rtl_write_dword(rtlpriv, REG_HISR, tmp); + + tmp = rtl_read_dword(rtlpriv, REG_HISRE); + rtl_write_dword(rtlpriv, REG_HISRE, tmp); + + tmp = rtl_read_dword(rtlpriv, REG_HSISR); + rtl_write_dword(rtlpriv, REG_HSISR, tmp); +} + void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + if (!rtlpci->int_clear) + rtl8821ae_clear_interrupt(hw);/*clear it here first*/ + rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); rtlpci->irq_enabled = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c index a4988121e1ab..8ee141a55bc5 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/sw.c @@ -96,6 +96,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtl8821ae_bt_reg_init(hw); rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->int_clear = rtlpriv->cfg->mod_params->int_clear; rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); rtlpriv->dm.dm_initialgain_enable = 1; @@ -167,6 +168,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpci->msi_support = rtlpriv->cfg->mod_params->int_clear; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); rtlpriv->psc.reg_fwctrl_lps = 3; @@ -308,6 +310,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .msi_support = true, + .int_clear = true, .debug = DBG_EMERG, .disable_watchdog = 0, }; @@ -437,6 +440,7 @@ module_param_named(fwlps, rtl8821ae_mod_params.fwctrl_lps, bool, 0444); module_param_named(msi, rtl8821ae_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8821ae_mod_params.disable_watchdog, bool, 0444); +module_param_named(int_clear, rtl8821ae_mod_params.int_clear, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); @@ -444,6 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); +MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h index b90ca618b123..4544752a2ba8 100644 --- a/drivers/net/wireless/rtlwifi/wifi.h +++ b/drivers/net/wireless/rtlwifi/wifi.h @@ -2249,6 +2249,9 @@ struct rtl_mod_params { /* default 0: 1 means disable */ bool disable_watchdog; + + /* default 0: 1 means do not disable interrupts */ + bool int_clear; }; struct rtl_hal_usbint_cfg { From e6740165b8f7f06d8caee0fceab3fb9d790a6fed Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 30 Sep 2015 11:45:33 +0200 Subject: [PATCH 08/92] ppp: don't override sk->sk_state in pppoe_flush_dev() Since commit 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release"), pppoe_release() calls dev_put(po->pppoe_dev) if sk is in the PPPOX_ZOMBIE state. But pppoe_flush_dev() can set sk->sk_state to PPPOX_ZOMBIE _and_ reset po->pppoe_dev to NULL. This leads to the following oops: [ 570.140800] BUG: unable to handle kernel NULL pointer dereference at 00000000000004e0 [ 570.142931] IP: [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] PGD 3d119067 PUD 3dbc1067 PMD 0 [ 570.144601] Oops: 0000 [#1] SMP [ 570.144601] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pppoe pppox ppp_generic slhc loop crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper acpi_cpufreq evdev serio_raw processor button ext4 crc16 mbcache jbd2 virtio_net virtio_blk virtio_pci virtio_ring virtio [ 570.144601] CPU: 1 PID: 15738 Comm: ppp-apitest Not tainted 4.2.0 #1 [ 570.144601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 570.144601] task: ffff88003d30d600 ti: ffff880036b60000 task.ti: ffff880036b60000 [ 570.144601] RIP: 0010:[] [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] RSP: 0018:ffff880036b63e08 EFLAGS: 00010202 [ 570.144601] RAX: 0000000000000000 RBX: ffff880034340000 RCX: 0000000000000206 [ 570.144601] RDX: 0000000000000006 RSI: ffff88003d30dd20 RDI: ffff88003d30dd20 [ 570.144601] RBP: ffff880036b63e28 R08: 0000000000000001 R09: 0000000000000000 [ 570.144601] R10: 00007ffee9b50420 R11: ffff880034340078 R12: ffff8800387ec780 [ 570.144601] R13: ffff8800387ec7b0 R14: ffff88003e222aa0 R15: ffff8800387ec7b0 [ 570.144601] FS: 00007f5672f48700(0000) GS:ffff88003fc80000(0000) knlGS:0000000000000000 [ 570.144601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 570.144601] CR2: 00000000000004e0 CR3: 0000000037f7e000 CR4: 00000000000406a0 [ 570.144601] Stack: [ 570.144601] ffffffffa018f240 ffff8800387ec780 ffffffffa018f240 ffff8800387ec7b0 [ 570.144601] ffff880036b63e48 ffffffff812caabe ffff880039e4e000 0000000000000008 [ 570.144601] ffff880036b63e58 ffffffff812cabad ffff880036b63ea8 ffffffff811347f5 [ 570.144601] Call Trace: [ 570.144601] [] sock_release+0x1a/0x75 [ 570.144601] [] sock_close+0xd/0x11 [ 570.144601] [] __fput+0xff/0x1a5 [ 570.144601] [] ____fput+0x9/0xb [ 570.144601] [] task_work_run+0x66/0x90 [ 570.144601] [] prepare_exit_to_usermode+0x8c/0xa7 [ 570.144601] [] syscall_return_slowpath+0x16d/0x19b [ 570.144601] [] int_ret_from_sys_call+0x25/0x9f [ 570.144601] Code: 48 8b 83 c8 01 00 00 a8 01 74 12 48 89 df e8 8b 27 14 e1 b8 f7 ff ff ff e9 b7 00 00 00 8a 43 12 a8 0b 74 1c 48 8b 83 a8 04 00 00 <48> 8b 80 e0 04 00 00 65 ff 08 48 c7 83 a8 04 00 00 00 00 00 00 [ 570.144601] RIP [] pppoe_release+0x50/0x101 [pppoe] [ 570.144601] RSP [ 570.144601] CR2: 00000000000004e0 [ 570.200518] ---[ end trace 46956baf17349563 ]--- pppoe_flush_dev() has no reason to override sk->sk_state with PPPOX_ZOMBIE. pppox_unbind_sock() already sets sk->sk_state to PPPOX_DEAD, which is the correct state given that sk is unbound and po->pppoe_dev is NULL. Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release") Tested-by: Oleksii Berezhniak Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 3837ae344f63..2ed75060da50 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -313,7 +313,6 @@ static void pppoe_flush_dev(struct net_device *dev) if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { pppox_unbind_sock(sk); - sk->sk_state = PPPOX_ZOMBIE; sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); From 9dc80a74313d2741e053a650942818a86f234fc9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 30 Sep 2015 03:48:50 -0700 Subject: [PATCH 09/92] amd-xgbe: fix potential memory leak in xgbe-debugfs Added kfree() to avoid the memory leak when debugfs_create_dir() fails. Signed-off-by: Geliang Tang Acked-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 2c063b60db4b..66137ffea3f8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -330,6 +330,7 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) pdata->xgbe_debugfs = debugfs_create_dir(buf, NULL); if (!pdata->xgbe_debugfs) { netdev_err(pdata->netdev, "debugfs_create_dir failed\n"); + kfree(buf); return; } From f6194bcf03e40bc6b6094f11289d87b605fb326d Mon Sep 17 00:00:00 2001 From: "David B. Robins" Date: Wed, 30 Sep 2015 16:20:04 -0400 Subject: [PATCH 10/92] net: usb: asix: Fix crash on skb alloc failure If asix_rx_fixup_internal() fails to allocate rx->ax_skb, it will return but not clear rx->size. rx points to driver private data. A later call assumes that nonzero size means ax_skb was allocated and passes a null ax_skb to skb_put. Changed allocation failure return to clear size first. Found testing board with AX88772B devices. Signed-off-by: David B. Robins Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 75d6f26729a3..079069a060a6 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -91,8 +91,10 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, } rx->ax_skb = netdev_alloc_skb_ip_align(dev->net, rx->size); - if (!rx->ax_skb) + if (!rx->ax_skb) { + rx->size = 0; return 0; + } } if (rx->size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) { From 33a8316d9cda4368e7cb477fff36c9ee35d4164e Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 1 Oct 2015 00:37:43 +0300 Subject: [PATCH 11/92] net: lpc_eth: fix warnings caused by enabling unprepared clock If common clock framework is configured, the driver generates warnings, which are fixed by this change: WARNING: CPU: 0 PID: 1 at linux/drivers/clk/clk.c:727 clk_core_enable+0x2c/0xa4() Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.3.0-rc2+ #141 Hardware name: LPC32XX SoC (Flattened Device Tree) Backtrace: [<>] (dump_backtrace) from [<>] (show_stack+0x18/0x1c) [<>] (show_stack) from [<>] (dump_stack+0x20/0x28) [<>] (dump_stack) from [<>] (warn_slowpath_common+0x90/0xb8) [<>] (warn_slowpath_common) from [<>] (warn_slowpath_null+0x24/0x2c) [<>] (warn_slowpath_null) from [<>] (clk_core_enable+0x2c/0xa4) [<>] (clk_core_enable) from [<>] (clk_enable+0x24/0x38) [<>] (clk_enable) from [<>] (lpc_eth_drv_probe+0xfc/0x99c) [<>] (lpc_eth_drv_probe) from [<>] (platform_drv_probe+0x50/0xa0) [<>] (platform_drv_probe) from [<>] (driver_probe_device+0x18c/0x408) [<>] (driver_probe_device) from [<>] (__driver_attach+0x70/0x94) [<>] (__driver_attach) from [<>] (bus_for_each_dev+0x74/0x98) [<>] (bus_for_each_dev) from [<>] (driver_attach+0x20/0x28) [<>] (driver_attach) from [<>] (bus_add_driver+0x11c/0x248) [<>] (bus_add_driver) from [<>] (driver_register+0xa4/0xe8) [<>] (driver_register) from [<>] (__platform_driver_register+0x50/0x64) [<>] (__platform_driver_register) from [<>] (lpc_eth_driver_init+0x18/0x20) [<>] (lpc_eth_driver_init) from [<>] (do_one_initcall+0x11c/0x1dc) [<>] (do_one_initcall) from [<>] (kernel_init_freeable+0x10c/0x1d4) [<>] (kernel_init_freeable) from [<>] (kernel_init+0x10/0xec) [<>] (kernel_init) from [<>] (ret_from_fork+0x14/0x24) Signed-off-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 66fd868152e5..b159ef8303cc 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -476,13 +476,12 @@ static void __lpc_get_mac(struct netdata_local *pldat, u8 *mac) mac[5] = tmp >> 8; } -static void __lpc_eth_clock_enable(struct netdata_local *pldat, - bool enable) +static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable) { if (enable) - clk_enable(pldat->clk); + clk_prepare_enable(pldat->clk); else - clk_disable(pldat->clk); + clk_disable_unprepare(pldat->clk); } static void __lpc_params_setup(struct netdata_local *pldat) @@ -1494,7 +1493,7 @@ err_out_free_irq: err_out_iounmap: iounmap(pldat->net_base); err_out_disable_clocks: - clk_disable(pldat->clk); + clk_disable_unprepare(pldat->clk); clk_put(pldat->clk); err_out_free_dev: free_netdev(ndev); @@ -1519,7 +1518,7 @@ static int lpc_eth_drv_remove(struct platform_device *pdev) iounmap(pldat->net_base); mdiobus_unregister(pldat->mii_bus); mdiobus_free(pldat->mii_bus); - clk_disable(pldat->clk); + clk_disable_unprepare(pldat->clk); clk_put(pldat->clk); free_netdev(ndev); @@ -1540,7 +1539,7 @@ static int lpc_eth_drv_suspend(struct platform_device *pdev, if (netif_running(ndev)) { netif_device_detach(ndev); __lpc_eth_shutdown(pldat); - clk_disable(pldat->clk); + clk_disable_unprepare(pldat->clk); /* * Reset again now clock is disable to be sure From 6645d5e441db9121793421d477255f4242b3dbf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Sep 2015 09:58:53 +0200 Subject: [PATCH 12/92] iwlwifi: mvm: fix D3 CCMP TX PN assignment When going into/coming out of D3, the TX PN must be programmed into and restored from the firmware respectively. The restore was broken due to my previous commit to move PN assignment into the driver. Sending the PN to the firmware still worked since we now use the counter that's shared with mac80211, but accessing it through the mac80211 API makes no sense now. Fix this by reading/writing the counter directly. This actually simplifies the code since we don't need to round-trip through the key_seq structure. Fixes: ca8c0f4bede6 ("iwlwifi: mvm: move TX PN assignment for CCMP to the driver") Cc: [4.1+] Reported-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/mvm/d3.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 04264e417c1c..9578b9d663dc 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -274,18 +274,13 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, break; case WLAN_CIPHER_SUITE_CCMP: if (sta) { - u8 *pn = seq.ccmp.pn; + u64 pn64; aes_sc = data->rsc_tsc->all_tsc_rsc.aes.unicast_rsc; aes_tx_sc = &data->rsc_tsc->all_tsc_rsc.aes.tsc; - ieee80211_get_key_tx_seq(key, &seq); - aes_tx_sc->pn = cpu_to_le64((u64)pn[5] | - ((u64)pn[4] << 8) | - ((u64)pn[3] << 16) | - ((u64)pn[2] << 24) | - ((u64)pn[1] << 32) | - ((u64)pn[0] << 40)); + pn64 = atomic64_read(&key->tx_pn); + aes_tx_sc->pn = cpu_to_le64(pn64); } else { aes_sc = data->rsc_tsc->all_tsc_rsc.aes.multicast_rsc; } @@ -1453,15 +1448,15 @@ static void iwl_mvm_d3_update_gtks(struct ieee80211_hw *hw, switch (key->cipher) { case WLAN_CIPHER_SUITE_CCMP: - iwl_mvm_aes_sc_to_seq(&sc->aes.tsc, &seq); iwl_mvm_set_aes_rx_seq(sc->aes.unicast_rsc, key); + atomic64_set(&key->tx_pn, le64_to_cpu(sc->aes.tsc.pn)); break; case WLAN_CIPHER_SUITE_TKIP: iwl_mvm_tkip_sc_to_seq(&sc->tkip.tsc, &seq); iwl_mvm_set_tkip_rx_seq(sc->tkip.unicast_rsc, key); + ieee80211_set_key_tx_seq(key, &seq); break; } - ieee80211_set_key_tx_seq(key, &seq); /* that's it for this key */ return; From 5bd166872d8f99f156fac191299d24f828bb2348 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Sep 2015 14:36:09 +0200 Subject: [PATCH 13/92] iwlwifi: dvm: fix D3 firmware PN programming The code to send the RX PN data (for each TID) to the firmware has a devastating bug: it overwrites the data for TID 0 with all the TID data, leaving the remaining TIDs zeroed. This will allow replays to actually be accepted by the firmware, which could allow waking up the system. Cc: [3.1+] Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/dvm/lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/lib.c b/drivers/net/wireless/iwlwifi/dvm/lib.c index ab45819c1fbb..e18629a16fb0 100644 --- a/drivers/net/wireless/iwlwifi/dvm/lib.c +++ b/drivers/net/wireless/iwlwifi/dvm/lib.c @@ -1020,7 +1020,7 @@ static void iwlagn_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64( + aes_sc[i].pn = cpu_to_le64( (u64)pn[5] | ((u64)pn[4] << 8) | ((u64)pn[3] << 16) | From 2cf5eb3ab7bb7f2e3a70edcef236cd62c87db030 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Sep 2015 14:36:09 +0200 Subject: [PATCH 14/92] iwlwifi: mvm: fix D3 firmware PN programming The code to send the RX PN data (for each TID) to the firmware has a devastating bug: it overwrites the data for TID 0 with all the TID data, leaving the remaining TIDs zeroed. This will allow replays to actually be accepted by the firmware, which could allow waking up the system. Cc: [3.1+] Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/mvm/d3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c index 9578b9d663dc..576187611e61 100644 --- a/drivers/net/wireless/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/iwlwifi/mvm/d3.c @@ -293,12 +293,12 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw, u8 *pn = seq.ccmp.pn; ieee80211_get_key_rx_seq(key, i, &seq); - aes_sc->pn = cpu_to_le64((u64)pn[5] | - ((u64)pn[4] << 8) | - ((u64)pn[3] << 16) | - ((u64)pn[2] << 24) | - ((u64)pn[1] << 32) | - ((u64)pn[0] << 40)); + aes_sc[i].pn = cpu_to_le64((u64)pn[5] | + ((u64)pn[4] << 8) | + ((u64)pn[3] << 16) | + ((u64)pn[2] << 24) | + ((u64)pn[1] << 32) | + ((u64)pn[0] << 40)); } data->use_rsc_tsc = true; break; From e9cb0327b26dd7ba43a3b7a05b4b62219decf42d Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 31 Aug 2015 11:08:27 +0300 Subject: [PATCH 15/92] iwlwifi: mvm: clear csa countdown when AP is stopped The csa_countdown flag was not cleared when the AP is stopped. As a result, if the AP was stopped after csa_countdown had started, all the folowing channel switch commands would fail. Fix that by clearing the csa_countdown flag when the AP is stopped. Cc: [3.17+] Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index aa8c2b7f23c7..7c2944a72470 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -2388,6 +2388,7 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw, iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data); RCU_INIT_POINTER(mvm->csa_vif, NULL); + mvmvif->csa_countdown = false; } if (rcu_access_pointer(mvm->csa_tx_blocked_vif) == vif) { From b5a48134f8af08f5243328f8a0b05fc5ae7cf343 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Sep 2015 10:47:27 +0200 Subject: [PATCH 16/92] iwlwifi: fix firmware filename for 3160 The MODULE_FIRMWARE() for 3160 should be using the 7260 version as it's done in the device configuration struct instead of referencing IWL3160_UCODE_API_OK which doesn't even exist. Cc: [3.8+] Reported-by: Hauke Mehrtens Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/iwl-7000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index 6951aba620eb..3fb327d5a911 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -348,6 +348,6 @@ const struct iwl_cfg iwl7265d_n_cfg = { }; MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); -MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK)); +MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); From f08f625876476b6c4a87834dc86e3b927f4697d2 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 22 Sep 2015 09:44:39 +0300 Subject: [PATCH 17/92] iwlwifi: pci: add a few more PCI subvendor IDs for the 7265 series Add 3 new subdevice IDs for the 0x095A device ID and 2 for the 0x095B device ID. Cc: [3.13+] Reported-by: Jeremy Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index b0825c402c73..644b58bc5226 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -414,6 +414,11 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x5590, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5290, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5490, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5F10, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5212, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x520A, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9000, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x9400, iwl7265_2ac_cfg)}, /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, From 1a3fe0b2b6778b7866e2b3f5c9a299d5e9bbd89c Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 30 Sep 2015 11:19:55 +0300 Subject: [PATCH 18/92] iwlwifi: mvm: init card correctly on ctkill exit check During the CT-kill exit flow, the card is powered up and partially initialized to check if the temperature is already low enough. Unfortunately the init bails early because the CT-kill flag is set. Make the code bail early only for HW RF-kill, as was intended by the author. CT-kill is self-imposed and is not really RF-kill. Fixes: 31b8b343e019 ("iwlwifi: fix RFkill while calibrating") Cc: [3.18+] Signed-off-by: Arik Nemtsov Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/mvm/fw.c | 4 ++-- drivers/net/wireless/iwlwifi/mvm/mvm.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index 4a0ce83315bd..5c7f7cc9ffcc 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -703,7 +703,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) * abort after reading the nvm in case RF Kill is on, we will complete * the init seq later when RF kill will switch to off */ - if (iwl_mvm_is_radio_killed(mvm)) { + if (iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "jump over all phy activities due to RF kill\n"); iwl_remove_notification(&mvm->notif_wait, &calib_wait); @@ -736,7 +736,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) ret = iwl_wait_notification(&mvm->notif_wait, &calib_wait, MVM_UCODE_CALIB_TIMEOUT); - if (ret && iwl_mvm_is_radio_killed(mvm)) { + if (ret && iwl_mvm_is_radio_hw_killed(mvm)) { IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n"); ret = 1; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index b95a07ec9e36..c754051a4cea 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -860,6 +860,11 @@ static inline bool iwl_mvm_is_radio_killed(struct iwl_mvm *mvm) test_bit(IWL_MVM_STATUS_HW_CTKILL, &mvm->status); } +static inline bool iwl_mvm_is_radio_hw_killed(struct iwl_mvm *mvm) +{ + return test_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); +} + /* Must be called with rcu_read_lock() held and it can only be * released when mvmsta is not needed anymore. */ From 8695a144da9e500a5a60fa34c06694346ec1048f Mon Sep 17 00:00:00 2001 From: Raanan Avargil Date: Thu, 1 Oct 2015 04:48:53 -0700 Subject: [PATCH 19/92] tcp/dccp: fix old style declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I’m using the compilation flag -Werror=old-style-declaration, which requires that the “inline” word would come at the beginning of the code line. $ make drivers/net/ethernet/intel/e1000e/e1000e.ko ... include/net/inet_timewait_sock.h:116:1: error: ‘inline’ is not at beginning of declaration [-Werror=old-style-declaration] static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) include/net/inet_timewait_sock.h:121:1: error: ‘inline’ is not at beginning of declaration [-Werror=old-style-declaration] static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) Fixes: ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") Signed-off-by: Raanan Avargil Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 186f3a1e1b1f..fc1937698625 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -113,12 +113,12 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, false); } -static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) +static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); } From 2306c704ce280c97a60d1f45333b822b40281dea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Oct 2015 05:39:26 -0700 Subject: [PATCH 20/92] inet: fix race in reqsk_queue_unlink() reqsk_timer_handler() tests if icsk_accept_queue.listen_opt is NULL at its beginning. By the time it calls inet_csk_reqsk_queue_drop() and reqsk_queue_unlink(), listener might have been closed and inet_csk_listen_stop() had called reqsk_queue_yank_acceptq() which sets icsk_accept_queue.listen_opt to NULL We therefore need to correctly check listen_opt being NULL after holding syn_wait_lock for proper synchronization. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Fixes: b357a364c57c ("inet: fix possible panic in reqsk_queue_unlink()") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7bb9c39e0a4d..61b45a17fc73 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -577,21 +577,22 @@ EXPORT_SYMBOL(inet_rtx_syn_ack); static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { - struct listen_sock *lopt = queue->listen_opt; struct request_sock **prev; + struct listen_sock *lopt; bool found = false; spin_lock(&queue->syn_wait_lock); - - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; - prev = &(*prev)->dl_next) { - if (*prev == req) { - *prev = req->dl_next; - found = true; - break; + lopt = queue->listen_opt; + if (lopt) { + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } } } - spin_unlock(&queue->syn_wait_lock); if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); From dbf73d4a8bb8f4e1d1f3edd3be825692279e2ef3 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 30 Sep 2015 12:26:23 +0200 Subject: [PATCH 21/92] iwlwifi: mvm: flush fw_dump_wk when mvm fails to start FW dump may be triggered when running init ucode, for example due to a sysassert. In this case fw_dump_wk may run after mvm is freed, resulting in a kernel panic. Fix it by flushing the work. Fixes: 01b988a708af ("iwlwifi: mvm: allow to collect debug data when restart is disabled") Cc: [3.18+] Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho --- drivers/net/wireless/iwlwifi/mvm/ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index a37de3f410a0..f0cb092f980e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -590,6 +590,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, ieee80211_unregister_hw(mvm->hw); iwl_mvm_leds_exit(mvm); out_free: + flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) From 181a4224acdfb993a21f987f8617b5c8d7bc654e Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 1 Oct 2015 16:25:43 +0200 Subject: [PATCH 22/92] ipv4: fix reply_dst leakage on arp reply There are cases when the created metadata reply is not used. Ensure the allocated memory is freed also in such cases. Fixes: 63d008a4e9ee ("ipv4: send arp replies to the correct tunnel") Reported-by: Hannes Frederic Sowa Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/arp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f03db8b7abee..0c9c3482e419 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -312,7 +312,7 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip, if (!skb) return; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst_clone(dst)); arp_xmit(skb); } @@ -384,7 +384,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE)) - dst = dst_clone(skb_dst(skb)); + dst = skb_dst(skb); arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_hw, dev->dev_addr, NULL, dst); } @@ -811,7 +811,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); - return 0; + goto out_free_dst; } goto out; } @@ -865,6 +865,8 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) out: consume_skb(skb); +out_free_dst: + dst_release(reply_dst); return 0; } From fec31ffffa6e05845ab13908d0ac0d5a10816836 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 1 Oct 2015 18:25:42 -0700 Subject: [PATCH 23/92] i40e: fix offload of GRE tunnels The driver still was not offloading TSO on GRE tunnels because it forgot to set the GSO_GRE flag, causing lots of retransmits. This fixes generic GRE traffic (like a tunnel added like below) whereas before it would get 1Gb/s or less, now on a 10G adapter it gets 8.7Gb/s. ip ad ad 11.1.0.2/24 dev ens2f0 ip l set ens2f0 up ip link add gre2 type gretap remote 11.1.0.1 local 11.1.0.2 dev ens2f0 ip l set gre2 up ip ad ad 192.168.124.2/24 dev gre2 ping 192.168.124.1 netperf -H 192.168.124.1 Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2fdf978ae6a5..dd44fafd8798 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8389,6 +8389,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | NETIF_F_TSO; netdev->features = NETIF_F_SG | @@ -8396,6 +8397,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_SCTP_CSUM | NETIF_F_HIGHDMA | NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | From 6bd00b850635abb0044e06101761533c8beba79c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Oct 2015 11:37:42 -0700 Subject: [PATCH 24/92] act_mirred: fix a race condition on mirred_list After commit 1ce87720d456 ("net: sched: make cls_u32 lockless") we began to release tc actions in a RCU callback. However, mirred action relies on RTNL lock to protect the global mirred_list, therefore we could have a race condition between RCU callback and netdevice event, which caused a list corruption as reported by Vinson. Instead of relying on RTNL lock, introduce a spinlock to protect this list. Note, in non-bind case, it is still called with RTNL lock, therefore should disable BH too. Reported-by: Vinson Lee Cc: John Fastabend Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d1be4a760fd..3e7c51a8ca38 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -31,13 +31,17 @@ #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); +static DEFINE_SPINLOCK(mirred_list_lock); static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); + /* We could be called either in a RCU callback or with RTNL lock held. */ + spin_lock_bh(&mirred_list_lock); list_del(&m->tcfm_list); + spin_unlock_bh(&mirred_list_lock); if (dev) dev_put(dev); } @@ -123,7 +127,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) { + spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); + spin_unlock_bh(&mirred_list_lock); tcf_hash_insert(a); } @@ -221,7 +227,8 @@ static int mirred_device_event(struct notifier_block *unused, struct tcf_mirred *m; ASSERT_RTNL(); - if (event == NETDEV_UNREGISTER) + if (event == NETDEV_UNREGISTER) { + spin_lock_bh(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { if (rcu_access_pointer(m->tcfm_dev) == dev) { dev_put(dev); @@ -231,6 +238,8 @@ static int mirred_device_event(struct notifier_block *unused, RCU_INIT_POINTER(m->tcfm_dev, NULL); } } + spin_unlock_bh(&mirred_list_lock); + } return NOTIFY_DONE; } From 215c90afb9ea633026273d81ac9c9ece2b1acd58 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Oct 2015 11:37:43 -0700 Subject: [PATCH 25/92] act_mirred: always release tcf hash Align with other tc actions. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3e7c51a8ca38..2efaf4ee6040 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -107,10 +107,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - if (!ovr) { - tcf_hash_release(a, bind); + + tcf_hash_release(a, bind); + if (!ovr) return -EEXIST; - } } m = to_mirred(a); From e9193d60d363e4dff75ff6d43a48f22be26d59c7 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 2 Oct 2015 00:05:36 +0300 Subject: [PATCH 26/92] net/unix: fix logic about sk_peek_offset Now send with MSG_PEEK can return data from multiple SKBs. Unfortunately we take into account the peek offset for each skb, that is wrong. We need to apply the peek offset only once. In addition, the peek offset should be used only if MSG_PEEK is set. Cc: "David S. Miller" (maintainer:NETWORKING Cc: Eric Dumazet (commit_signer:1/14=7%) Cc: Aaron Conole Fixes: 9f389e35674f ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag") Signed-off-by: Andrey Vagin Tested-by: Aaron Conole Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef31b40ad550..94f658235fb4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2064,6 +2064,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) goto out; } + if (flags & MSG_PEEK) + skip = sk_peek_offset(sk, flags); + else + skip = 0; + do { int chunk; struct sk_buff *skb, *last; @@ -2112,7 +2117,6 @@ unlock: break; } - skip = sk_peek_offset(sk, flags); while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; @@ -2179,14 +2183,12 @@ unlock: if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - if (skip) { - sk_peek_offset_fwd(sk, chunk); - skip -= chunk; - } + sk_peek_offset_fwd(sk, chunk); if (UNIXCB(skb).fp) break; + skip = 0; last = skb; last_len = skb->len; unix_state_lock(sk); From 33db4125ec745426c3483d6817d8f7ea5324cd05 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 1 Oct 2015 15:00:37 -0700 Subject: [PATCH 27/92] openvswitch: Rename LABEL->LABELS Conntrack LABELS (plural) are exposed by conntrack; rename the OVS name for these to be consistent with conntrack. Fixes: c2ac667 "openvswitch: Allow matching on conntrack label" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 12 +++--- net/openvswitch/actions.c | 2 +- net/openvswitch/conntrack.c | 74 ++++++++++++++++---------------- net/openvswitch/conntrack.h | 2 +- net/openvswitch/flow.h | 2 +- net/openvswitch/flow_netlink.c | 18 ++++---- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 32e07d8cbaf4..c736344afed4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -326,7 +326,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ - OVS_KEY_ATTR_CT_LABEL, /* 16-octet connection tracking label */ + OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -439,9 +439,9 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; -#define OVS_CT_LABEL_LEN 16 -struct ovs_key_ct_label { - __u8 ct_label[OVS_CT_LABEL_LEN]; +#define OVS_CT_LABELS_LEN 16 +struct ovs_key_ct_labels { + __u8 ct_labels[OVS_CT_LABELS_LEN]; }; /* OVS_KEY_ATTR_CT_STATE flags */ @@ -623,7 +623,7 @@ struct ovs_action_hash { * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection * tracking mark field in the connection. - * @OVS_CT_ATTR_LABEL: %OVS_CT_LABEL_LEN value followed by %OVS_CT_LABEL_LEN + * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN * mask. For each bit set in the mask, the corresponding bit in the value is * copied to the connection tracking label field in the connection. * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. @@ -633,7 +633,7 @@ enum ovs_ct_attr { OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ - OVS_CT_ATTR_LABEL, /* label to associate with this connection. */ + OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of related connections. */ __OVS_CT_ATTR_MAX diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 315f5330b6e5..e23a61cc3d5c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -968,7 +968,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 002a755fa07e..7d80acfb80d0 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -37,9 +37,9 @@ struct md_mark { }; /* Metadata label for masked write to conntrack label. */ -struct md_label { - struct ovs_key_ct_label value; - struct ovs_key_ct_label mask; +struct md_labels { + struct ovs_key_ct_labels value; + struct ovs_key_ct_labels mask; }; /* Conntrack action context for execution. */ @@ -50,7 +50,7 @@ struct ovs_conntrack_info { u32 flags; u16 family; struct md_mark mark; - struct md_label label; + struct md_labels labels; }; static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) #endif } -static void ovs_ct_get_label(const struct nf_conn *ct, - struct ovs_key_ct_label *label) +static void ovs_ct_get_labels(const struct nf_conn *ct, + struct ovs_key_ct_labels *labels) { struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; if (cl) { size_t len = cl->words * sizeof(long); - if (len > OVS_CT_LABEL_LEN) - len = OVS_CT_LABEL_LEN; - else if (len < OVS_CT_LABEL_LEN) - memset(label, 0, OVS_CT_LABEL_LEN); - memcpy(label, cl->bits, len); + if (len > OVS_CT_LABELS_LEN) + len = OVS_CT_LABELS_LEN; + else if (len < OVS_CT_LABELS_LEN) + memset(labels, 0, OVS_CT_LABELS_LEN); + memcpy(labels, cl->bits, len); } else { - memset(label, 0, OVS_CT_LABEL_LEN); + memset(labels, 0, OVS_CT_LABELS_LEN); } } @@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, key->ct.state = state; key->ct.zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); } /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -179,8 +179,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), - &key->ct.label)) + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), + &key->ct.labels)) return -EMSGSIZE; return 0; @@ -213,9 +213,9 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, #endif } -static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ct_label *label, - const struct ovs_key_ct_label *mask) +static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) { enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; @@ -235,15 +235,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) + if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN) return -ENOSPC; - err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, - OVS_CT_LABEL_LEN / sizeof(u32)); + err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, + OVS_CT_LABELS_LEN / sizeof(u32)); if (err) return err; - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); return 0; } @@ -465,12 +465,12 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, return 0; } -static bool label_nonzero(const struct ovs_key_ct_label *label) +static bool labels_nonzero(const struct ovs_key_ct_labels *labels) { size_t i; - for (i = 0; i < sizeof(*label); i++) - if (label->ct_label[i]) + for (i = 0; i < sizeof(*labels); i++) + if (labels->ct_labels[i]) return true; return false; @@ -506,9 +506,9 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, if (err) goto err; } - if (label_nonzero(&info->label.mask)) - err = ovs_ct_set_label(skb, key, &info->label.value, - &info->label.mask); + if (labels_nonzero(&info->labels.mask)) + err = ovs_ct_set_labels(skb, key, &info->labels.value, + &info->labels.mask); err: skb_push(skb, nh_ofs); return err; @@ -545,8 +545,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), .maxlen = sizeof(struct md_mark) }, - [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), - .maxlen = sizeof(struct md_label) }, + [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), + .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN } }; @@ -593,10 +593,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } #endif #ifdef CONFIG_NF_CONNTRACK_LABELS - case OVS_CT_ATTR_LABEL: { - struct md_label *label = nla_data(a); + case OVS_CT_ATTR_LABELS: { + struct md_labels *labels = nla_data(a); - info->label = *label; + info->labels = *labels; break; } #endif @@ -633,7 +633,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABEL) { + attr == OVS_KEY_ATTR_CT_LABELS) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); return ovs_net->xt_label; @@ -711,8 +711,8 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, &ct_info->mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), - &ct_info->label)) + nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), + &ct_info->labels)) return -EMSGSIZE; if (ct_info->helper) { if (nla_put_string(skb, OVS_CT_ATTR_HELPER, @@ -737,7 +737,7 @@ void ovs_ct_free_action(const struct nlattr *a) void ovs_ct_init(struct net *net) { - unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); if (nf_connlabels_get(net, n_bits)) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 43f5dd7a5577..6bd603c6a031 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -72,7 +72,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, key->ct.state = 0; key->ct.zone = 0; key->ct.mark = 0; - memset(&key->ct.label, 0, sizeof(key->ct.label)); + memset(&key->ct.labels, 0, sizeof(key->ct.labels)); } static inline int ovs_ct_put_key(const struct sw_flow_key *key, diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index fe527d2dd4b7..8cfa15a08668 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -116,7 +116,7 @@ struct sw_flow_key { u16 zone; u32 mark; u8 state; - struct ovs_key_ct_label label; + struct ovs_key_ct_labels labels; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5c030a4d7338..a60e3b7684bc 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -294,7 +294,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ - + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ + + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -352,7 +352,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, - [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, + [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -833,14 +833,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); } - if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && - ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { - const struct ovs_key_ct_label *cl; + if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { + const struct ovs_key_ct_labels *cl; - cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); - SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, + cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); + SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, sizeof(*cl), is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } return 0; } @@ -1973,7 +1973,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_ETHERNET: break; From 93d08b6966cf730ea669d4d98f43627597077153 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 12:06:03 +0200 Subject: [PATCH 28/92] bpf: fix panic in SO_GET_FILTER with native ebpf programs When sockets have a native eBPF program attached through setsockopt(sk, SOL_SOCKET, SO_ATTACH_BPF, ...), and then try to dump these over getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, ...), the following panic appears: [49904.178642] BUG: unable to handle kernel NULL pointer dereference at (null) [49904.178762] IP: [] sk_get_filter+0x39/0x90 [49904.182000] PGD 86fc9067 PUD 531a1067 PMD 0 [49904.185196] Oops: 0000 [#1] SMP [...] [49904.224677] Call Trace: [49904.226090] [] sock_getsockopt+0x319/0x740 [49904.227535] [] ? sock_has_perm+0x63/0x70 [49904.228953] [] ? release_sock+0x108/0x150 [49904.230380] [] ? selinux_socket_getsockopt+0x23/0x30 [49904.231788] [] SyS_getsockopt+0xa6/0xc0 [49904.233267] [] entry_SYSCALL_64_fastpath+0x12/0x71 The underlying issue is the very same as in commit b382c0865600 ("sock, diag: fix panic in sock_diag_put_filterinfo"), that is, native eBPF programs don't store an original program since this is only needed in cBPF ones. However, sk_get_filter() wasn't updated to test for this at the time when eBPF could be attached. Just throw an error to the user to indicate that eBPF cannot be dumped over this interface. That way, it can also be known that a program _is_ attached (as opposed to just return 0), and a different (future) method needs to be consulted for a dump. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 05a04ea87172..87b78ef0c3d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1854,9 +1854,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; /* We're copying the filter that has been originally attached, - * so no conversion/decode needed anymore. + * so no conversion/decode needed anymore. eBPF programs that + * have no original program cannot be dumped through this. */ + ret = -EACCES; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; ret = fprog->len; if (!len) From 598c12d0ba6de9060f04999746eb1e015774044b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 2 Oct 2015 13:18:22 +0300 Subject: [PATCH 29/92] ovs: do not allocate memory from offline numa node When openvswitch tries allocate memory from offline numa node 0: stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, 0) It catches VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)) [ replaced with VM_WARN_ON(!node_online(nid)) recently ] in linux/gfp.h This patch disables numa affinity in this case. Signed-off-by: Konstantin Khlebnikov Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index f2ea83ba4763..c7f74aab34b9 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void) /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_KERNEL | __GFP_ZERO, 0); + GFP_KERNEL | __GFP_ZERO, + node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; From 8690f47d6e76d4300eeb316ba4773f7de3da63f7 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Fri, 2 Oct 2015 15:39:12 +0200 Subject: [PATCH 30/92] ARM: net: make BPF_LD | BPF_IND instruction trigger r_X initialisation to 0. Without this patch, if the only instructions using r_X are of the BPF_LD | BPF_IND type, r_X would not be reset to 0, using whatever value was there when entering the jited code. With this patch, r_X will be correctly marked as used so it will be reset to 0 in the prologue code. This fix also makes the test "LD_IND byte default X" pass in the test_bpf module when the ARM JIT is enabled. Signed-off-by: Nicolas Schichan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 876060bcceeb..b8efb8cd1f73 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -614,6 +614,7 @@ load_common: case BPF_LD | BPF_B | BPF_IND: load_order = 0; load_ind: + update_on_xread(ctx); OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); goto load_common; case BPF_LDX | BPF_IMM: From 83ffe99f52b8f269b31b21524adcd13b165f7703 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 2 Oct 2015 14:56:34 -0700 Subject: [PATCH 31/92] openvswitch: Fix ovs_vport_get_stats() Not every device has dev->tstats set. So when OVS tries to calculate vport stats it causes kernel panic. Following patch fixes it by using standard API to get net-device stats. ---8<--- Unable to handle kernel paging request at virtual address 766b4008 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: vport_vxlan vxlan ip6_udp_tunnel udp_tunnel tun bridge stp llc openvswitch ipv6 CPU: 7 PID: 1108 Comm: ovs-vswitchd Not tainted 4.3.0-rc3+ #82 PC is at ovs_vport_get_stats+0x150/0x1f8 [openvswitch] Call trace: [] ovs_vport_get_stats+0x150/0x1f8 [openvswitch] [] ovs_vport_cmd_fill_info+0x140/0x1e0 [openvswitch] [] ovs_vport_cmd_dump+0xbc/0x138 [openvswitch] [] netlink_dump+0xb8/0x258 [] __netlink_dump_start+0x120/0x178 [] genl_family_rcv_msg+0x2d4/0x308 [] genl_rcv_msg+0x88/0xc4 [] netlink_rcv_skb+0xd4/0x100 [] genl_rcv+0x30/0x48 [] netlink_unicast+0x154/0x200 [] netlink_sendmsg+0x308/0x364 [] sock_sendmsg+0x14/0x2c [] SyS_sendto+0xbc/0xf0 Code: aa1603e1 f94037a4 aa1303e2 aa1703e0 (f9400465) Reported-by: Tomasz Sawicki Fixes: 8c876639c98 ("openvswitch: Remove vport stats.") Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index dc81dc619aa2..fc5c0b9ccfe9 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport) */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { - struct net_device *dev = vport->dev; - int i; + const struct rtnl_link_stats64 *dev_stats; + struct rtnl_link_stats64 temp; - memset(stats, 0, sizeof(*stats)); - stats->rx_errors = dev->stats.rx_errors; - stats->tx_errors = dev->stats.tx_errors; - stats->tx_dropped = dev->stats.tx_dropped; - stats->rx_dropped = dev->stats.rx_dropped; + dev_stats = dev_get_stats(vport->dev, &temp); + stats->rx_errors = dev_stats->rx_errors; + stats->tx_errors = dev_stats->tx_errors; + stats->tx_dropped = dev_stats->tx_dropped; + stats->rx_dropped = dev_stats->rx_dropped; - stats->rx_dropped += atomic_long_read(&dev->rx_dropped); - stats->tx_dropped += atomic_long_read(&dev->tx_dropped); - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *percpu_stats; - struct pcpu_sw_netstats local_stats; - unsigned int start; - - percpu_stats = per_cpu_ptr(dev->tstats, i); - - do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); - local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); - - stats->rx_bytes += local_stats.rx_bytes; - stats->rx_packets += local_stats.rx_packets; - stats->tx_bytes += local_stats.tx_bytes; - stats->tx_packets += local_stats.tx_packets; - } + stats->rx_bytes = dev_stats->rx_bytes; + stats->rx_packets = dev_stats->rx_packets; + stats->tx_bytes = dev_stats->tx_bytes; + stats->tx_packets = dev_stats->tx_packets; } /** From 855591d2f7d1ff7eb18b60ce2be494e3bfe27fb8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 5 Oct 2015 10:51:03 -0500 Subject: [PATCH 32/92] amd-xgbe: Check for successful buffer allocation before use The kasprintf function can return NULL if the allocation fails. Check for successful allocation before attempting to use the returned buffer. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 66137ffea3f8..96f485ab612e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -327,6 +327,9 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) pdata->debugfs_xpcs_reg = 0; buf = kasprintf(GFP_KERNEL, "amd-xgbe-%s", pdata->netdev->name); + if (!buf) + return; + pdata->xgbe_debugfs = debugfs_create_dir(buf, NULL); if (!pdata->xgbe_debugfs) { netdev_err(pdata->netdev, "debugfs_create_dir failed\n"); From 90d2c056bd85bbb47104c52e08eecf8408163a54 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Sat, 3 Oct 2015 17:13:05 -0700 Subject: [PATCH 33/92] i40e/i40evf: set AQ count after memory allocation The standard way to check if the AQ is enabled is to look at the count field. So we should only set this field after we have successfully allocated memory. To do otherwise is to incite panic among the populace. Signed-off-by: Mitch Williams Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 4 ++-- drivers/net/ethernet/intel/i40evf/i40e_adminq.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 62488a67149d..c0e943aecd13 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -386,7 +386,6 @@ static i40e_status i40e_init_asq(struct i40e_hw *hw) hw->aq.asq.next_to_use = 0; hw->aq.asq.next_to_clean = 0; - hw->aq.asq.count = hw->aq.num_asq_entries; /* allocate the ring memory */ ret_code = i40e_alloc_adminq_asq_ring(hw); @@ -404,6 +403,7 @@ static i40e_status i40e_init_asq(struct i40e_hw *hw) goto init_adminq_free_rings; /* success! */ + hw->aq.asq.count = hw->aq.num_asq_entries; goto init_adminq_exit; init_adminq_free_rings: @@ -445,7 +445,6 @@ static i40e_status i40e_init_arq(struct i40e_hw *hw) hw->aq.arq.next_to_use = 0; hw->aq.arq.next_to_clean = 0; - hw->aq.arq.count = hw->aq.num_arq_entries; /* allocate the ring memory */ ret_code = i40e_alloc_adminq_arq_ring(hw); @@ -463,6 +462,7 @@ static i40e_status i40e_init_arq(struct i40e_hw *hw) goto init_adminq_free_rings; /* success! */ + hw->aq.arq.count = hw->aq.num_arq_entries; goto init_adminq_exit; init_adminq_free_rings: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 929d47152bf2..a23ebfd5cd25 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -373,7 +373,6 @@ static i40e_status i40e_init_asq(struct i40e_hw *hw) hw->aq.asq.next_to_use = 0; hw->aq.asq.next_to_clean = 0; - hw->aq.asq.count = hw->aq.num_asq_entries; /* allocate the ring memory */ ret_code = i40e_alloc_adminq_asq_ring(hw); @@ -391,6 +390,7 @@ static i40e_status i40e_init_asq(struct i40e_hw *hw) goto init_adminq_free_rings; /* success! */ + hw->aq.asq.count = hw->aq.num_asq_entries; goto init_adminq_exit; init_adminq_free_rings: @@ -432,7 +432,6 @@ static i40e_status i40e_init_arq(struct i40e_hw *hw) hw->aq.arq.next_to_use = 0; hw->aq.arq.next_to_clean = 0; - hw->aq.arq.count = hw->aq.num_arq_entries; /* allocate the ring memory */ ret_code = i40e_alloc_adminq_arq_ring(hw); @@ -450,6 +449,7 @@ static i40e_status i40e_init_arq(struct i40e_hw *hw) goto init_adminq_free_rings; /* success! */ + hw->aq.arq.count = hw->aq.num_arq_entries; goto init_adminq_exit; init_adminq_free_rings: From 6e28b000825d959cb0c0b8fea8c2f132ddc516dc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 5 Oct 2015 08:32:51 -0600 Subject: [PATCH 34/92] net: Fix vti use case with oif in dst lookups for IPv6 It occurred to me yesterday that 741a11d9e4103 ("net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set") means that xfrm6_dst_lookup needs the FLOWI_FLAG_SKIP_NH_OIF flag set. This latest commit causes the oif to be considered in lookups which is known to break vti. This explains why 58189ca7b274 did not the IPv6 change at the time it was submitted. Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa289c5db..5cedfda4b241 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -37,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); From 1023d2ec1e8bd63ede9ed1d93ebb797f650859b7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:39:53 +0100 Subject: [PATCH 35/92] net: dsa: add missing kfree on remove To prevent memory leakage on unbinding, add missing kfree calls. Includes minor cosmetic change to make patch clean. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c59fa5d9c22c..ed9d43fd1fec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -914,8 +914,10 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; - if (ds != NULL) + if (ds) { dsa_switch_destroy(ds); + kfree(ds); + } } } @@ -924,6 +926,7 @@ static int dsa_remove(struct platform_device *pdev) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); dsa_remove_dst(dst); + kfree(dst); dsa_of_remove(&pdev->dev); return 0; From e410ddb89ee8e68103ea58938b4972da594e3d2d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:25 +0100 Subject: [PATCH 36/92] net: dsa: add missing dsa_switch mdiobus remove To prevent memory leakage on unbinding, add missing mdiobus unregister and unallocation calls. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ed9d43fd1fec..14fac4ed9569 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -424,6 +424,8 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (ds->hwmon_dev) hwmon_device_unregister(ds->hwmon_dev); #endif + mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); } #ifdef CONFIG_PM_SLEEP From cbc5d90b378cd255ffedeb12f5affe243230d47e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:32 +0100 Subject: [PATCH 37/92] net: dsa: complete dsa_switch_destroy When unbinding dsa, complete the dsa_switch_destroy to unregister the fixed link phy then cleanly unregister and destroy the net devices. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 14fac4ed9569..61559232861b 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -420,10 +421,46 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, static void dsa_switch_destroy(struct dsa_switch *ds) { + struct device_node *port_dn; + struct phy_device *phydev; + struct dsa_chip_data *cd = ds->pd; + int port; + #ifdef CONFIG_NET_DSA_HWMON if (ds->hwmon_dev) hwmon_device_unregister(ds->hwmon_dev); #endif + + /* Disable configuration of the CPU and DSA ports */ + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + + port_dn = cd->port_dn[port]; + if (of_phy_is_fixed_link(port_dn)) { + phydev = of_phy_find_device(port_dn); + if (phydev) { + int addr = phydev->addr; + + phy_device_free(phydev); + of_node_put(port_dn); + fixed_phy_del(addr); + } + } + } + + /* Destroy network devices for physical switch ports. */ + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!(ds->phys_port_mask & (1 << port))) + continue; + + if (!ds->ports[port]) + continue; + + unregister_netdev(ds->ports[port]); + free_netdev(ds->ports[port]); + } + mdiobus_unregister(ds->slave_mii_bus); mdiobus_free(ds->slave_mii_bus); } From d4ac35d6ed82e6c96ed5c016ea46fad31294fa7a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:37 +0100 Subject: [PATCH 38/92] net: dsa: switch to devm_ calls and remove kfree calls Now the kfree calls exists in the the remove functions, remove them in all places except the of_probe functions and replace allocation calls with their devm_ counterparts. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 61559232861b..d5a162cda087 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -306,7 +306,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) goto out; - ds->slave_mii_bus = mdiobus_alloc(); + ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (ds->slave_mii_bus == NULL) { ret = -ENOMEM; goto out; @@ -315,7 +315,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) ret = mdiobus_register(ds->slave_mii_bus); if (ret < 0) - goto out_free; + goto out; /* @@ -368,10 +368,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) return ret; -out_free: - mdiobus_free(ds->slave_mii_bus); out: - kfree(ds); return ret; } @@ -401,7 +398,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Allocate and initialise switch state. */ - ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + ds = devm_kzalloc(parent, sizeof(*ds) + drv->priv_size, GFP_KERNEL); if (ds == NULL) return ERR_PTR(-ENOMEM); @@ -462,7 +459,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } mdiobus_unregister(ds->slave_mii_bus); - mdiobus_free(ds->slave_mii_bus); } #ifdef CONFIG_PM_SLEEP @@ -922,7 +918,7 @@ static int dsa_probe(struct platform_device *pdev) goto out; } - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); if (dst == NULL) { dev_put(dev); ret = -ENOMEM; @@ -953,10 +949,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; - if (ds) { + if (ds) dsa_switch_destroy(ds); - kfree(ds); - } } } @@ -965,7 +959,6 @@ static int dsa_remove(struct platform_device *pdev) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); dsa_remove_dst(dst); - kfree(dst); dsa_of_remove(&pdev->dev); return 0; From 4d7f3e757c15051b4521a59791de87ce748c0eb2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:43 +0100 Subject: [PATCH 39/92] net: dsa: exit probe if no switch were found If no switch were found in dsa_setup_dst, return -ENODEV and exit the dsa_probe cleanly. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d5a162cda087..adb5325f4934 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -837,10 +837,11 @@ static inline void dsa_of_remove(struct device *dev) } #endif -static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) +static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) { int i; + unsigned configured = 0; dst->pd = pd; dst->master_netdev = dev; @@ -860,8 +861,16 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, dst->ds[i] = ds; if (ds->drv->poll_link != NULL) dst->link_poll_needed = 1; + + ++configured; } + /* + * If no switch was found, exit cleanly + */ + if (!configured) + return -EPROBE_DEFER; + /* * If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -878,6 +887,8 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); add_timer(&dst->link_poll_timer); } + + return 0; } static int dsa_probe(struct platform_device *pdev) @@ -927,7 +938,9 @@ static int dsa_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dst); - dsa_setup_dst(dst, dev, &pdev->dev, pd); + ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); + if (ret) + goto out; return 0; From 0a7cc172a01e4a203667fb601cd80131db8d0c9a Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:56 -0700 Subject: [PATCH 40/92] openvswitch: Fix typos in CT headers These comments hadn't caught up to their implementations, fix them. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index c736344afed4..a9a4a59912e9 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -630,7 +630,7 @@ struct ovs_action_hash { */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, - OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ + OVS_CT_ATTR_FLAGS, /* u32 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ @@ -705,7 +705,7 @@ enum ovs_action_attr { * data immediately followed by a mask. * The data must be zero for the unmasked * bits. */ - OVS_ACTION_ATTR_CT, /* One nested OVS_CT_ATTR_* . */ + OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ From b8f2257069f179c7bdedc9501c1623070c4c37bb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:57 -0700 Subject: [PATCH 41/92] openvswitch: Fix skb leak in ovs_fragment() If ovs_fragment() was unable to fragment the skb due to an L2 header that exceeds the supported length, skbs would be leaked. Fix the bug. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e23a61cc3d5c..4cb93f92d6be 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -684,7 +684,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, { if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); - return; + goto err; } if (ethertype == htons(ETH_P_IP)) { @@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, struct rt6_info ovs_rt; if (!v6ops) { - kfree_skb(skb); - return; + goto err; } prepare_frag(vport, skb); @@ -728,8 +727,12 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(ethertype), mru, vport->dev->mtu); - kfree_skb(skb); + goto err; } + + return; +err: + kfree_skb(skb); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, From ec0d043d05e6e3c0c2fac5de922c800c027c6386 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:58 -0700 Subject: [PATCH 42/92] openvswitch: Ensure flow is valid before executing ct The ct action uses parts of the flow key, so we need to ensure that it is valid before executing that action. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4cb93f92d6be..c6a39bf2c3b9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1102,6 +1102,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_CT: + if (!is_flow_key_valid(key)) { + err = ovs_flow_key_update(skb, key); + if (err) + return err; + } + err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, nla_data(a)); From 6f225952461b5e9b5520d0dc6e2ff0af57874fbb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:59 -0700 Subject: [PATCH 43/92] openvswitch: Reject ct_state unsupported bits Previously, if userspace specified ct_state bits in the flow key which are currently undefined (and therefore unsupported), then they would be ignored. This could cause unexpected behaviour in future if userspace is extended to support additional bits but attempts to communicate with the current version of the kernel. This patch rectifies the situation by rejecting such ct_state bits. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.h | 12 ++++++++++++ net/openvswitch/flow_netlink.c | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 6bd603c6a031..d6eca8394254 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -34,6 +34,13 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); + +static inline bool ovs_ct_state_supported(u8 state) +{ + return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | + OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | + OVS_CS_F_INVALID | OVS_CS_F_TRACKED)); +} #else #include @@ -46,6 +53,11 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } +static inline bool ovs_ct_state_supported(u8 state) +{ + return false; +} + static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, const struct sw_flow_key *key, struct sw_flow_actions **acts, bool log) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index a60e3b7684bc..d47b5c5c640e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -816,6 +816,12 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + if (!is_mask && !ovs_ct_state_supported(ct_state)) { + OVS_NLERR(log, "ct_state flags %02x unsupported", + ct_state); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } From fbccce5965a58d56aaed9e9acd1bec75d8a66e87 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 11:00:00 -0700 Subject: [PATCH 44/92] openvswitch: Extend ct_state match field to 32 bits The ct_state field was initially added as an 8-bit field, however six of the bits are already being used and use cases are already starting to appear that may push the limits of this field. This patch extends the field to 32 bits while retaining the internal representation of 8 bits. This should cover forward compatibility of the ABI for the foreseeable future. This patch also reorders the OVS_CS_F_* bits to be sequential. Suggested-by: Jarno Rajahalme Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++---- net/openvswitch/conntrack.c | 2 +- net/openvswitch/conntrack.h | 4 ++-- net/openvswitch/flow_netlink.c | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a9a4a59912e9..c861a4cf5fec 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -323,7 +323,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ - OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ + OVS_KEY_ATTR_CT_STATE, /* u32 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ @@ -449,9 +449,9 @@ struct ovs_key_ct_labels { #define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ #define OVS_CS_F_RELATED 0x04 /* Related to an established * connection. */ -#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */ -#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */ -#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */ +#define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */ +#define OVS_CS_F_INVALID 0x10 /* Could not track connection. */ +#define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */ /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7d80acfb80d0..466d5576fe3f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -167,7 +167,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) { - if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index d6eca8394254..da8714942c95 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -35,7 +35,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); -static inline bool ovs_ct_state_supported(u8 state) +static inline bool ovs_ct_state_supported(u32 state) { return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | @@ -53,7 +53,7 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } -static inline bool ovs_ct_state_supported(u8 state) +static inline bool ovs_ct_state_supported(u32 state) { return false; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d47b5c5c640e..171a691f1c32 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -291,7 +291,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ - + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ @@ -349,7 +349,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, - [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, @@ -814,10 +814,10 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { - u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); if (!is_mask && !ovs_ct_state_supported(ct_state)) { - OVS_NLERR(log, "ct_state flags %02x unsupported", + OVS_NLERR(log, "ct_state flags %08x unsupported", ct_state); return -EINVAL; } From ab38a7b5a4493a3658d891a8e91f9ffcb3d2defb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 11:00:01 -0700 Subject: [PATCH 45/92] openvswitch: Change CT_ATTR_FLAGS to CT_ATTR_COMMIT Previously, the CT_ATTR_FLAGS attribute, when nested under the OVS_ACTION_ATTR_CT, encoded a 32-bit bitmask of flags that modify the semantics of the ct action. It's more extensible to just represent each flag as a nested attribute, and this requires no additional error checking to reject flags that aren't currently supported. Suggested-by: Ben Pfaff Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 14 ++++---------- net/openvswitch/conntrack.c | 13 ++++++------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index c861a4cf5fec..036f73bc54cd 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -618,7 +618,9 @@ struct ovs_action_hash { /** * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. - * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. + * @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack + * table. This allows future packets for the same connection to be identified + * as 'established' or 'related'. * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection @@ -630,7 +632,7 @@ struct ovs_action_hash { */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, - OVS_CT_ATTR_FLAGS, /* u32 bitmask of OVS_CT_F_*. */ + OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ @@ -641,14 +643,6 @@ enum ovs_ct_attr { #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) -/* - * OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_* - * @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows - * future packets for the same connection to be identified as 'established' - * or 'related'. - */ -#define OVS_CT_F_COMMIT 0x01 - /** * enum ovs_action_attr - Action types. * diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 466d5576fe3f..80bf702715bb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -47,7 +47,7 @@ struct ovs_conntrack_info { struct nf_conntrack_helper *helper; struct nf_conntrack_zone zone; struct nf_conn *ct; - u32 flags; + u8 commit : 1; u16 family; struct md_mark mark; struct md_labels labels; @@ -493,7 +493,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, return err; } - if (info->flags & OVS_CT_F_COMMIT) + if (info->commit) err = ovs_ct_commit(net, key, info, skb); else err = ovs_ct_lookup(net, key, info, skb); @@ -539,8 +539,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, } static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { - [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), - .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), @@ -576,8 +575,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } switch (type) { - case OVS_CT_ATTR_FLAGS: - info->flags = nla_get_u32(a); + case OVS_CT_ATTR_COMMIT: + info->commit = true; break; #ifdef CONFIG_NF_CONNTRACK_ZONES case OVS_CT_ATTR_ZONE: @@ -701,7 +700,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (!start) return -EMSGSIZE; - if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) + if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) From c72eda0608a30ca6aa8722d4afdbd2557b3c9345 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 6 Oct 2015 15:03:53 -0400 Subject: [PATCH 46/92] af_unix: constify the sock parameter in unix_sk() Make unix_sk() just like inet[6]_sk() by constify'ing the sock parameter. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index cb1b9bbda332..b36d837c701e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -64,7 +64,7 @@ struct unix_sock { struct socket_wq peer_wq; }; -static inline struct unix_sock *unix_sk(struct sock *sk) +static inline struct unix_sock *unix_sk(const struct sock *sk) { return (struct unix_sock *)sk; } From d40496a56430eac0d330378816954619899fe303 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 Oct 2015 17:23:47 -0700 Subject: [PATCH 47/92] act_mirred: clear sender cpu before sending to tx Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") the skb->sender_cpu needs to be cleared when moving from Rx Tx, otherwise kernel could crash. Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Cc: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2efaf4ee6040..32fcdecdb9e2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -179,6 +179,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; + skb_sender_cpu_clear(skb2); err = dev_queue_xmit(skb2); if (err) { From 6bf0577374cfb6c2301dbf4934a4f23ad3d72763 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 6 Oct 2015 20:46:07 -0700 Subject: [PATCH 48/92] bpf: clear sender_cpu before xmit Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") the skb->sender_cpu needs to be cleared before xmit. Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 87b78ef0c3d4..bb18c3680001 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1415,6 +1415,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) return dev_forward_skb(dev, skb2); skb2->dev = dev; + skb_sender_cpu_clear(skb2); return dev_queue_xmit(skb2); } From bcb9db49bb9da895ee809bfb2718d664dfb3ee2f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Oct 2015 08:58:34 +0200 Subject: [PATCH 49/92] mlxsw: fix warnings for big-endian 32-bit dma_addr_t The recently added mlxsw driver produces warnings in ARM allmodconfig: drivers/net/ethernet/mellanox/mlxsw/pci.c: In function 'mlxsw_pci_cmd_exec': drivers/net/ethernet/mellanox/mlxsw/pci.c:1585:59: warning: right shift count >= width of type [-Wshift-count-overflow] linux/byteorder/big_endian.h:38:51: note: in definition of macro '__cpu_to_be32' drivers/net/ethernet/mellanox/mlxsw/pci.c:76:2: note: in expansion of macro 'iowrite32be' This uses upper_32_bits() to extract the bits while avoiding that warning. Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Fixes: eda6500a987a "mlxsw: Add PCI bus implementation" Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 462cea31ecbb..cef866c37648 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1582,11 +1582,11 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, if (in_mbox) memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size); - mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, in_mapaddr >> 32); - mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, in_mapaddr); + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr)); - mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, out_mapaddr >> 32); - mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, out_mapaddr); + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr)); + mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr)); mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod); mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0); From 9d3a6386c8238bfacf57ee9dac68ec4b40b302fe Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 7 Oct 2015 10:26:38 +0200 Subject: [PATCH 50/92] net: mdio-octeon: Drop obsolete Kconfig advice "Y" was the right answer for MDIO_OCTEON when this option was only available on CAVIUM_OCTEON_SOC. But now that the option is visible on all (64-bit) systems, this piece of advice no longer makes sense. This helper module is selected automatically by drivers which need it anyway. Signed-off-by: Jean Delvare Fixes: a6d6786452 ("net: mdio-octeon: Modify driver to work on both ThunderX and Octeon") Cc: Florian Fainelli Cc: Sunil Goutham Cc: Radha Mohan Chintakuntla Cc: David Daney Cc: David S. Miller Acked-by: David Daney Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c5ad98ace5d0..11e3975485c1 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -168,8 +168,6 @@ config MDIO_OCTEON busses. It is required by the Octeon and ThunderX ethernet device drivers. - If in doubt, say Y. - config MDIO_SUN4I tristate "Allwinner sun4i MDIO interface support" depends on ARCH_SUNXI From 85121d6ee6973f98789bf7343f7f636c01e2de19 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Wed, 7 Oct 2015 12:31:46 -0400 Subject: [PATCH 51/92] net/mlx4: Remove shared_ports variable at mlx4_enable_msi_x If we get MAX_MSIX interrupts would like to have each receive ring with his own msix interrupt line. Do not need the shared_ports variable at mlx4_enable_msix Fixes: 9293267a3e2a ('net/mlx4_core: Capping number of requested MSIXs to MAX_MSIX') Signed-off-by: Carol L Soto Acked-by: Matan Barak Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 006757f80988..cc3a9897574c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2669,14 +2669,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) if (msi_x) { int nreq = dev->caps.num_ports * num_online_cpus() + 1; - bool shared_ports = false; nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); - if (nreq > MAX_MSIX) { + if (nreq > MAX_MSIX) nreq = MAX_MSIX; - shared_ports = true; - } entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) @@ -2699,9 +2696,6 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, dev->caps.num_ports); - if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) - shared_ports = true; - for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { if (i == MLX4_EQ_ASYNC) continue; @@ -2709,7 +2703,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) priv->eq_table.eq[i].irq = entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; - if (shared_ports) { + if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, dev->caps.num_ports); /* We don't set affinity hint when there From 820d39f3c497df6c8e040b8dcc7c19eeaa312701 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Thu, 8 Oct 2015 15:26:15 +0300 Subject: [PATCH 52/92] net/mlx4_core: Avoid failing the interrupts test Test interrupts fails if not all completion vectors called request_irq. This case happens if only mlx4_en is loaded and we have more completion vectors than rx rings. Fixes: c66fa19c405a ('net/mlx4: Add EQ pool') Signed-off-by: Carol L Soto Acked-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8e81e53c370e..c34488479365 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1364,6 +1364,10 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) * and performing a NOP command */ for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { + /* Make sure request_irq was called */ + if (!priv->eq_table.eq[i].have_irq) + continue; + /* Temporary use polling for command completions */ mlx4_cmd_use_polling(dev); From 13b7938883ea6e70e7e23f30dece6f79d7a2961d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 8 Oct 2015 15:26:17 +0300 Subject: [PATCH 53/92] net/mlx5: Fix typo in mlx5_query_port_pvlc We used the wrong register name for querying the PVLC register Fixes: a124d13ef59e ('net/mlx5_core: Add more query port helpers') Signed-off-by: Jiri Pirko Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 821caaab9bfb..3b9480fa3403 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -311,7 +311,7 @@ static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int err; memset(in, 0, sizeof(in)); - MLX5_SET(ptys_reg, in, local_port, local_port); + MLX5_SET(pvlc_reg, in, local_port, local_port); err = mlx5_core_access_reg(dev, in, sizeof(in), pvlc, pvlc_size, MLX5_REG_PVLC, 0, 0); From c07543431e9f3d126d083808efa0e76461d8833b Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Thu, 8 Oct 2015 15:26:18 +0300 Subject: [PATCH 54/92] net/mlx5e: Disable VLAN filter in promiscuous mode When the device was set to promiscuous mode, we didn't disable VLAN filtering, which is wrong behaviour, fix that. Now when the device is set to promiscuous mode RX packets sent over any VLAN (or no VLAN tag at all) will be accepted. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_flow_table.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index e71563ce05d1..22d603f78273 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -598,6 +598,8 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) return; priv->vlan.filter_disabled = false; + if (priv->netdev->flags & IFF_PROMISC) + return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); } @@ -607,6 +609,8 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) return; priv->vlan.filter_disabled = true; + if (priv->netdev->flags & IFF_PROMISC) + return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); } @@ -717,8 +721,12 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; - if (enable_promisc) + if (enable_promisc) { mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->vlan.filter_disabled) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } if (enable_allmulti) mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) @@ -730,8 +738,12 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); if (disable_allmulti) mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); - if (disable_promisc) + if (disable_promisc) { + if (!priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + } ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; From d5a52095afddee121b97c4aac250f18d06803b58 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 8 Oct 2015 15:26:19 +0300 Subject: [PATCH 55/92] MAINTAINERS: Update mlx4_en driver entry Remove Ido Shamay as co-maintainer for the mlx4 Ethernet driver, as he no longer works for Mellanox. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..4bdc1bd0c499 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6778,7 +6778,6 @@ F: drivers/scsi/megaraid/ MELLANOX ETHERNET DRIVER (mlx4_en) M: Amir Vadai -M: Ido Shamay L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com From 6ac644a8ae2dabf884a1b01e82e32d96ffe6eee5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 7 Oct 2015 16:47:32 -0700 Subject: [PATCH 56/92] sch_hhf: fix return value of hhf_drop() Similar to commit c0afd9ce4d6a ("fq_codel: fix return value of fq_codel_drop()") ->drop() is supposed to return the number of bytes it dropped, but hhf_drop () returns the id of the bucket where it drops a packet from. Cc: Jamal Hadi Salim Cc: Terry Lam Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 9d15cb6b8cb1..86b04e31e60b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -368,6 +368,15 @@ static unsigned int hhf_drop(struct Qdisc *sch) return bucket - q->buckets; } +static unsigned int hhf_qdisc_drop(struct Qdisc *sch) +{ + unsigned int prev_backlog; + + prev_backlog = sch->qstats.backlog; + hhf_drop(sch); + return prev_backlog - sch->qstats.backlog; +} + static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hhf_sched_data *q = qdisc_priv(sch); @@ -696,7 +705,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .enqueue = hhf_enqueue, .dequeue = hhf_dequeue, .peek = qdisc_peek_dequeued, - .drop = hhf_drop, + .drop = hhf_qdisc_drop, .init = hhf_init, .reset = hhf_reset, .destroy = hhf_destroy, From 4b0c2541cbf223abd38b235146e30a3d207d3bfe Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 8 Oct 2015 15:17:37 +0200 Subject: [PATCH 57/92] mlxsw: switchx2: changing order of exit fallbacks Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3e52ee93438c..62cbbd1ada8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1069,9 +1069,9 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) return 0; err_register_netdev: -err_port_admin_status_set: err_port_mac_learning_mode_set: err_port_stp_state_set: +err_port_admin_status_set: err_port_mtu_set: err_port_speed_set: err_port_swid_set: From bee1f753bfcb19660d8ad713b9dd3939630854b6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Oct 2015 15:17:38 +0200 Subject: [PATCH 58/92] mlxsw: Fix bug in __mlxsw_item_bit_array_offset When calculating the shift needed in order to access a bit array element in a byte, we should multiply the index by the element size and not assume it is fixed at 2-bits. Fixes: 93c1edb27f9e ("mlxsw: Introduce Mellanox switch driver core") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/item.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index ffd55d030ce2..36fb1cec53c9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -187,6 +187,7 @@ __mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift) { u16 max_index, be_index; u16 offset; /* byte offset inside the array */ + u8 in_byte_index; BUG_ON(index && !item->element_size); if (item->offset % sizeof(u32) != 0 || @@ -199,7 +200,8 @@ __mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift) max_index = (item->size.bytes << 3) / item->element_size - 1; be_index = max_index - index; offset = be_index * item->element_size >> 3; - *shift = index % (BITS_PER_BYTE / item->element_size) << 1; + in_byte_index = index % (BITS_PER_BYTE / item->element_size); + *shift = in_byte_index * item->element_size; return item->offset + offset; } From f9468e8dc87e31f807fae04aae70380a461b95c6 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 8 Oct 2015 16:19:01 +0300 Subject: [PATCH 59/92] bnx2x: Prevent UDP 4-tuple configurations on older adapters Configuring 4-tuple RSS hsahing for UDP [E.g., by using `ethtool -N rx-flow-hash udp4 sdfn'] on a 57710/57711 adapter would cause it to assert as HW does not support such a configuration. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index aeb7ce64452e..be628bd9fb18 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3351,6 +3351,13 @@ static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) udp_rss_requested = 0; else return -EINVAL; + + if (CHIP_IS_E1x(bp) && udp_rss_requested) { + DP(BNX2X_MSG_ETHTOOL, + "57710, 57711 boards don't support RSS according to UDP 4-tuple\n"); + return -EINVAL; + } + if ((info->flow_type == UDP_V4_FLOW) && (bp->rss_conf_obj.udp_rss_v4 != udp_rss_requested)) { bp->rss_conf_obj.udp_rss_v4 = udp_rss_requested; From d9e4ce65b27694b0b70ff4d1cbbb740195fd916b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 18:19:39 +0200 Subject: [PATCH 60/92] ipv6: gre: setup default multicast routes over PtP links GRE point-to-point interfaces should also support ipv6 multicast. Setting up default multicast routes on interface creation was forgotten. Add it. Bugzilla: Cc: Julien Muchembled Cc: Eric Dumazet Cc: Nicolas Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 900113376d4e..36b85bd05ac8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3119,6 +3119,8 @@ static void addrconf_gre_config(struct net_device *dev) } addrconf_addr_gen(idev, true); + if (dev->flags & IFF_POINTOPOINT) + addrconf_add_mroute(dev); } #endif From 9ef2e965e55481a52d6d91ce61977a27836268d3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 18:19:53 +0200 Subject: [PATCH 61/92] ipv6: drop frames with attached skb->sk in forwarding This is a clone of commit 2ab957492d13b ("ip_forward: Drop frames with attached skb->sk") for ipv6. This commit has exactly the same reasons as the above mentioned commit, namely to prevent panics during netfilter reload or a misconfigured stack. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 92b1aa38f121..61d403ee1031 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; From e844463743095bc8b230f507de066d047c87476b Mon Sep 17 00:00:00 2001 From: "Arad, Ronen" Date: Fri, 9 Oct 2015 10:35:47 -0700 Subject: [PATCH 62/92] rtnetlink: fix gcc -Wconversion warning RTA_ALIGNTO is currently define as 4. It has to be 4U to prevent warning for RTA_ALIGN and RTA_DATA expansions when -Wconversion gcc option is enabled. This follows NLMSG_ALIGNTO definition in . Signed-off-by: Ronen Arad Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 702024769c74..9d8f5d10c1e5 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -160,7 +160,7 @@ struct rtattr { /* Macros to handle rtattributes */ -#define RTA_ALIGNTO 4 +#define RTA_ALIGNTO 4U #define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) #define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ (rta)->rta_len >= sizeof(struct rtattr) && \ From 4633dfc32c0019bed2996de9bbdbe7f3b518a44e Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Thu, 8 Oct 2015 19:20:14 +0530 Subject: [PATCH 63/92] mac80211: Fix hwflags debugfs file format Commit 30686bf7f5b3 ("mac80211: convert HW flags to unsigned long bitmap") accidentally removed the newline delimiter from the hwflags debugfs file. Fix this by adding back the newline between the HW flags. Cc: stable@vger.kernel.org [4.2] Signed-off-by: Mohammed Shafi Shajakhan [fix commit log] Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index ced6bf3be8d6..1560c8482bcb 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -149,7 +149,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { if (test_bit(i, local->hw.flags)) - pos += scnprintf(pos, end - pos, "%s", + pos += scnprintf(pos, end - pos, "%s\n", hw_flag_names[i]); } From 3dd03e52a410818c0818924c2ba27a43725f5e94 Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Mon, 12 Oct 2015 09:18:40 +0200 Subject: [PATCH 64/92] net/fsl_pq_mdio: check TBI address for consistency with mapped range When configuring the MDIO subsystem it is also necessary to configure the TBI register. Make sure the TBI is contained within the mapped register range in order to: a) make sure the address is computed correctly b) make users aware that we're actually accessing that register In case of error, print a message but continue anyway. Signed-off-by: Gerlando Falauto Cc: Timur Tabi Cc: David S. Miller Cc: Kumar Gala Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 3c40f6b99224..5333d0afceba 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -445,6 +445,16 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) tbipa = data->get_tbipa(priv->map); + /* + * Add consistency check to make sure TBI is contained + * within the mapped range (not because we would get a + * segfault, rather to catch bugs in computing TBI + * address). Print error message but continue anyway. + */ + if ((void *)tbipa > priv->map + resource_size(&res) - 4) + dev_err(&pdev->dev, "invalid register map (should be at least 0x%04x to contain TBI address)\n", + ((void *)tbipa - priv->map) + 4); + iowrite32be(be32_to_cpup(prop), tbipa); } } From 3bb35ac4978977cf414893ee721f442039c4b8a4 Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Mon, 12 Oct 2015 09:18:41 +0200 Subject: [PATCH 65/92] net/fsl_pq_mdio: fix computed address for the TBI register commit afae5ad78b342f401c28b0bb1adb3cd494cb125a "net/fsl_pq_mdio: streamline probing of MDIO nodes" added support for different types of MDIO devices: 1) Gianfar MDIO nodes that only map the MII registers 2) Gianfar MDIO nodes that map the full MDIO register set 3) eTSEC2 MDIO nodes (which map the full MDIO register set) 4) QE MDIO nodes (which map only the MII registers) However, the implementation for types 1 and 4 would mistakenly assume a mapping of the full MDIO register set, thereby computing the address for the TBI register starting from the containing structure. The TBI register would therefore be accessed at a wrong (much bigger) address, not giving the expected result at all. This patch restores the correct behavior we had prior to the above one. The consequences of this bug are apparent when trying to access a PHY with the same address as the value contained in the initial value of the TBI register (normally 0); in that case you'll get answers from the internal TBI device (even though MDIO/MDC pins are actually *also* toggling on the physical bus!). Beware that you also need to add a fake tbi node to your device tree with an unused address. Notice how this fix is related to commit 220669495bf8b68130a8218607147c7b74c28d2b "powerpc: Add TBI PHY node to first MDIO bus" which fixed the behavior in kernel 3.3, which was later broken by the above commit on kernel 3.7. Signed-off-by: Gerlando Falauto Cc: Timur Tabi Cc: David S. Miller Cc: Kumar Gala Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 24 +++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 5333d0afceba..55c36230e176 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -198,17 +198,28 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) #if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) /* + * Return the TBIPA address, starting from the address + * of the mapped GFAR MDIO registers (struct gfar) * This is mildly evil, but so is our hardware for doing this. * Also, we have to cast back to struct gfar because of * definition weirdness done in gianfar.h. */ -static uint32_t __iomem *get_gfar_tbipa(void __iomem *p) +static uint32_t __iomem *get_gfar_tbipa_from_mdio(void __iomem *p) { struct gfar __iomem *enet_regs = p; return &enet_regs->tbipa; } +/* + * Return the TBIPA address, starting from the address + * of the mapped GFAR MII registers (gfar_mii_regs[] within struct gfar) + */ +static uint32_t __iomem *get_gfar_tbipa_from_mii(void __iomem *p) +{ + return get_gfar_tbipa_from_mdio(container_of(p, struct gfar, gfar_mii_regs)); +} + /* * Return the TBIPAR address for an eTSEC2 node */ @@ -220,11 +231,12 @@ static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) #if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) /* - * Return the TBIPAR address for a QE MDIO node + * Return the TBIPAR address for a QE MDIO node, starting from the address + * of the mapped MII registers (struct fsl_pq_mii) */ static uint32_t __iomem *get_ucc_tbipa(void __iomem *p) { - struct fsl_pq_mdio __iomem *mdio = p; + struct fsl_pq_mdio __iomem *mdio = container_of(p, struct fsl_pq_mdio, mii); return &mdio->utbipar; } @@ -300,14 +312,14 @@ static const struct of_device_id fsl_pq_mdio_match[] = { .compatible = "fsl,gianfar-tbi", .data = &(struct fsl_pq_mdio_data) { .mii_offset = 0, - .get_tbipa = get_gfar_tbipa, + .get_tbipa = get_gfar_tbipa_from_mii, }, }, { .compatible = "fsl,gianfar-mdio", .data = &(struct fsl_pq_mdio_data) { .mii_offset = 0, - .get_tbipa = get_gfar_tbipa, + .get_tbipa = get_gfar_tbipa_from_mii, }, }, { @@ -315,7 +327,7 @@ static const struct of_device_id fsl_pq_mdio_match[] = { .compatible = "gianfar", .data = &(struct fsl_pq_mdio_data) { .mii_offset = offsetof(struct fsl_pq_mdio, mii), - .get_tbipa = get_gfar_tbipa, + .get_tbipa = get_gfar_tbipa_from_mdio, }, }, { From ae4a9d6a6362df4e4e492c297157498d299bdb68 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 12 Oct 2015 03:47:17 -0400 Subject: [PATCH 66/92] be2net: fix BE3-R FW download compatibility check In the BE3 FW image, unlike Skyhawk's, the "asic_type_rev" field doesn't track the asic_rev of chip it is compatible with. When asic_type_rev is 0 the image is compatible only with pre-BE3-R chips (asic_rev < 0x10). Fix the current compatibility check to take care of this. We hit this issue when we try to flash old BE3 images (used prior to the release of BE3-R) on pre-BE3-R adapters. Fixes: a6e6ff6eee12f3e ("be2net: simplify UFI compatibility checking") Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7bf51a1a0a77..86eed4761806 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4999,7 +4999,15 @@ static bool be_check_ufi_compatibility(struct be_adapter *adapter, return false; } - return (fhdr->asic_type_rev >= adapter->asic_rev); + /* In BE3 FW images the "asic_type_rev" field doesn't track the + * asic_rev of the chips it is compatible with. + * When asic_type_rev is 0 the image is compatible only with + * pre-BE3-R chips (asic_rev < 0x10) + */ + if (BEx_chip(adapter) && fhdr->asic_type_rev == 0) + return adapter->asic_rev < 0x10; + else + return (fhdr->asic_type_rev >= adapter->asic_rev); } static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) From 0c8845679f3b8f030f3bafcdf226ed92545fa2a9 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Mon, 12 Oct 2015 03:47:18 -0400 Subject: [PATCH 67/92] be2net: release mcc-lock in a failure case in be_cmd_notify_wait() The mcc/mbox lock is not being released when be_cmd_copy() returns an error. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index eb323913cd39..9dc5ce11d996 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -851,8 +851,10 @@ static int be_cmd_notify_wait(struct be_adapter *adapter, return status; dest_wrb = be_cmd_copy(adapter, wrb); - if (!dest_wrb) - return -EBUSY; + if (!dest_wrb) { + status = -EBUSY; + goto unlock; + } if (use_mcc(adapter)) status = be_mcc_notify_wait(adapter); @@ -862,6 +864,7 @@ static int be_cmd_notify_wait(struct be_adapter *adapter, if (!status) memcpy(wrb, dest_wrb, sizeof(*wrb)); +unlock: be_cmd_unlock(adapter); return status; } From 8227e9901ded28d071f20313238e91e71d6bfdce Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Mon, 12 Oct 2015 03:47:19 -0400 Subject: [PATCH 68/92] be2net: pad skb to meet minimum TX pkt size in BE3 On BE3 chips in SRIOV configs, the TX path stalls when a packet less than 32B is received from the host. A workaround to pad such packets already exists for the Skyhawk and Lancer chips. Use the same workaround for BE3 chips too. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 86eed4761806..821e0142fa2c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1123,11 +1123,12 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, struct be_wrb_params *wrb_params) { - /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or - * less may cause a transmit stall on that port. So the work-around is - * to pad short packets (<= 32 bytes) to a 36-byte length. + /* Lancer, SH and BE3 in SRIOV mode have a bug wherein + * packets that are 32b or less may cause a transmit stall + * on that port. The workaround is to pad such packets + * (len <= 32 bytes) to a minimum length of 36b. */ - if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { + if (skb->len <= 32) { if (skb_put_padto(skb, 36)) return NULL; } From 72ef3a88fa8e225301aaea296ffc95407de21986 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 12 Oct 2015 03:47:20 -0400 Subject: [PATCH 69/92] be2net: set pci_func_num while issuing GET_PROFILE_CONFIG cmd The FW requires the pf_num field in the cmd hdr to be set for it to return the specific function's descriptors in the GET_PROFILE_CONFIG cmd. If not set, the FW returns the descriptors of all the functions on the device. If the first descriptor is not what is being queried for, the driver will read wrong data. This patch fixes this issue by using the GET_CNTL_ATTRIB cmd to query the real pci_func_num of a function and then uses it in the GET_PROFILE_CONFIG cmd. Signed-off-by: Somnath Kotur Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_cmds.c | 14 +++++++++++--- drivers/net/ethernet/emulex/benet/be_cmds.h | 10 ++++++++-- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++---- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 821540913343..d463563e1f70 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -592,6 +592,7 @@ struct be_adapter { int be_get_temp_freq; struct be_hwmon hwmon_info; u8 pf_number; + u8 pci_func_num; struct rss_info rss_info; /* Filters for packets that need to be sent to BMC */ u32 bmc_filt_mask; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 9dc5ce11d996..790284de5a99 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2890,6 +2890,7 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter) if (!status) { attribs = attribs_cmd.va + sizeof(struct be_cmd_resp_hdr); adapter->hba_port_num = attribs->hba_attribs.phy_port; + adapter->pci_func_num = attribs->pci_func_num; serial_num = attribs->hba_attribs.controller_serial_number; for (i = 0; i < CNTL_SERIAL_NUM_WORDS; i++) adapter->serial_num[i] = le32_to_cpu(serial_num[i]) & @@ -3712,7 +3713,6 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res) status = -EINVAL; goto err; } - adapter->pf_number = desc->pf_num; be_copy_nic_desc(res, desc); } @@ -3724,7 +3724,10 @@ err: return status; } -/* Will use MBOX only if MCCQ has not been created */ +/* Will use MBOX only if MCCQ has not been created + * non-zero domain => a PF is querying this on behalf of a VF + * zero domain => a PF or a VF is querying this for itself + */ int be_cmd_get_profile_config(struct be_adapter *adapter, struct be_resources *res, u8 query, u8 domain) { @@ -3751,10 +3754,15 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, OPCODE_COMMON_GET_PROFILE_CONFIG, cmd.size, &wrb, &cmd); - req->hdr.domain = domain; if (!lancer_chip(adapter)) req->hdr.version = 1; req->type = ACTIVE_PROFILE_TYPE; + /* When a function is querying profile information relating to + * itself hdr.pf_number must be set to it's pci_func_num + 1 + */ + req->hdr.domain = domain; + if (domain == 0) + req->hdr.pf_num = adapter->pci_func_num + 1; /* When QUERY_MODIFIABLE_FIELDS_TYPE bit is set, cmd returns the * descriptors with all bits set to "1" for the fields which can be diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 7d178bdb112e..91155ea74f34 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -289,7 +289,9 @@ struct be_cmd_req_hdr { u32 timeout; /* dword 1 */ u32 request_length; /* dword 2 */ u8 version; /* dword 3 */ - u8 rsvd[3]; /* dword 3 */ + u8 rsvd1; /* dword 3 */ + u8 pf_num; /* dword 3 */ + u8 rsvd2; /* dword 3 */ }; #define RESP_HDR_INFO_OPCODE_SHIFT 0 /* bits 0 - 7 */ @@ -1652,7 +1654,11 @@ struct mgmt_hba_attribs { struct mgmt_controller_attrib { struct mgmt_hba_attribs hba_attribs; - u32 rsvd0[10]; + u32 rsvd0[2]; + u16 rsvd1; + u8 pci_func_num; + u8 rsvd2; + u32 rsvd3[7]; } __packed; struct be_cmd_req_cntl_attribs { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 821e0142fa2c..eb48a977f8da 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4206,10 +4206,6 @@ static int be_get_config(struct be_adapter *adapter) int status, level; u16 profile_id; - status = be_cmd_get_cntl_attributes(adapter); - if (status) - return status; - status = be_cmd_query_fw_cfg(adapter); if (status) return status; @@ -4408,6 +4404,11 @@ static int be_setup(struct be_adapter *adapter) if (!lancer_chip(adapter)) be_cmd_req_native_mode(adapter); + /* Need to invoke this cmd first to get the PCI Function Number */ + status = be_cmd_get_cntl_attributes(adapter); + if (status) + return status; + if (!BE2_chip(adapter) && be_physfn(adapter)) be_alloc_sriov_res(adapter); From 196e3735fa66c8340d6fff9ec81f6201ffc60b7d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 12 Oct 2015 03:47:21 -0400 Subject: [PATCH 70/92] be2net: remove vlan promisc capability from VF's profile descriptors The commit 435452aa8847 ("Prevent VFs from enabling VLAN promiscuous mode") fixed the PF driver to not include the VLAN promisc capability while provisioning the interface for a VF. But the fix did not remove this capability from the profile descriptor of the VF. This causes the VF driver to request this capability when it tries to create it's interface at probe time. This could potentailly cause the VF probe to fail if the FW enforces strict checking of the flags based on what was provisoned by the PF. This strict checking is not being done by FW currently but will be fixed in a future version. This patch fixes this issue by updating the VF's profile descriptor so that they match the interface capability flags provisioned by the PF. Fixes: 435452aa8847 ("Prevent VFs from enabling VLAN promiscuous mode") Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 790284de5a99..1795c935ff02 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1987,6 +1987,8 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) be_if_cap_flags(adapter)); } flags &= be_if_cap_flags(adapter); + if (!flags) + return -ENOTSUPP; return __be_cmd_rx_filter(adapter, flags, value); } @@ -3932,12 +3934,16 @@ static void be_fill_vf_res_template(struct be_adapter *adapter, vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS); } - - nic_vft->cap_flags = cpu_to_le32(vf_if_cap_flags); } else { num_vf_qs = 1; } + if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { + nic_vft->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT); + vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS; + } + + nic_vft->cap_flags = cpu_to_le32(vf_if_cap_flags); nic_vft->rq_count = cpu_to_le16(num_vf_qs); nic_vft->txq_count = cpu_to_le16(num_vf_qs); nic_vft->rssq_count = cpu_to_le16(num_vf_qs); From 87aaf2caed8496404d3809edc30d38d4a4a5d273 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 12 Oct 2015 14:31:01 +0200 Subject: [PATCH 71/92] switchdev: check if the vlan id is in the proper vlan range VLANs 0 and 4095 are reserved and shouldn't be used, add checks to switchdev similar to the bridge. Also make sure ids above 4095 cannot be passed either. Fixes: 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink") Signed-off-by: Nikolay Aleksandrov Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index fda38f830a10..77f5d17e2612 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -634,6 +635,8 @@ static int switchdev_port_br_afspec(struct net_device *dev, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; vlan->flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vlan->vid_begin) From e332bc67cf5e5e5b71a1aec9750d0791aac65183 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 12 Oct 2015 11:02:08 -0500 Subject: [PATCH 72/92] ipv6: Don't call with rt6_uncached_list_flush_dev As originally written rt6_uncached_list_flush_dev makes no sense when called with dev == NULL as it attempts to flush all uncached routes regardless of network namespace when dev == NULL. Which is simply incorrect behavior. Furthermore at the point rt6_ifdown is called with dev == NULL no more network devices exist in the network namespace so even if the code in rt6_uncached_list_flush_dev were to attempt something sensible it would be meaningless. Therefore remove support in rt6_uncached_list_flush_dev for handling network devices where dev == NULL, and only call rt6_uncached_list_flush_dev when rt6_ifdown is called with a network device. Fixes: 8d0b94afdca8 ("ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister") Signed-off-by: "Eric W. Biederman" Reviewed-by: Martin KaFai Lau Tested-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cb32ce250db0..ed04e29a6aa3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -142,6 +142,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *loopback_dev = net->loopback_dev; int cpu; + if (dev == loopback_dev) + return; + for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct rt6_info *rt; @@ -151,14 +154,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; - if (rt_idev && (rt_idev->dev == dev || !dev) && - rt_idev->dev != loopback_dev) { + if (rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } - if (rt_dev && (rt_dev == dev || !dev) && - rt_dev != loopback_dev) { + if (rt_dev == dev) { rt->dst.dev = loopback_dev; dev_hold(rt->dst.dev); dev_put(rt_dev); @@ -2622,7 +2623,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); - rt6_uncached_list_flush_dev(net, dev); + if (dev) + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { From 0f8b8e28fb3241f9fd82ce13bac2b40c35e987e0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 13 Oct 2015 12:41:51 -0400 Subject: [PATCH 73/92] tipc: eliminate risk of stalled link synchronization In commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level") we introduced a new mechanism for performing link failover and synchronization. We have now detected a bug in this mechanism. During link synchronization we use the arrival of any packet on the tunnel link to trig a check for whether it has reached the synchronization point or not. This has turned out to be too permissive, since it may cause an arriving non-last SYNCH packet to end the synch state, just to see the next SYNCH packet initiate a new synch state with a new, higher synch point. This is not fatal, but should be avoided, because it may significantly extend the synchronization period, while at the same time we are not allowed to send NACKs if packets are lost. In the worst case, a low-traffic user may see its traffic stall until a LINK_PROTOCOL state message trigs the link to leave synchronization state. At the same time, LINK_PROTOCOL packets which happen to have a (non- valid) sequence number lower than the tunnel link's rcv_nxt value will be consistently dropped, and will never be able to resolve the situation described above. We fix this by exempting LINK_PROTOCOL packets from the sequence number check, as they should be. We also reduce (but don't completely eliminate) the risk of entering multiple synchronization states by only allowing the (logically) first SYNCH packet to initiate a synchronization state. This works independently of actual packet arrival order. Fixes: commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level") Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 703875fd6cde..2c32a83037a3 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1116,7 +1116,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } /* Ignore duplicate packets */ - if (less(oseqno, rcv_nxt)) + if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; /* Initiate or update failover mode if applicable */ @@ -1146,8 +1146,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (!pl || !tipc_link_is_up(pl)) return true; - /* Initiate or update synch mode if applicable */ - if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { + /* Initiate synch mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { syncpt = iseqno + exp_pkts - 1; if (!tipc_link_is_up(l)) { tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); From 077cb37fcf6f00a45f375161200b5ee0cd4e937b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 14 Oct 2015 01:09:40 -0700 Subject: [PATCH 74/92] ethtool: Use kcalloc instead of kmalloc for ethtool_get_strings It seems that kernel memory can leak into userspace by a kmalloc, ethtool_get_strings, then copy_to_user sequence. Avoid this by using kcalloc to zero fill the copied buffer. Signed-off-by: Joe Perches Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b495ab1797fa..29edf74846fc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1284,7 +1284,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; From dde4b5ae65de659b9ec64bafdde0430459fcb495 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 Oct 2015 09:23:18 -0400 Subject: [PATCH 75/92] tipc: move fragment importance field to new header position In commit e3eea1eb47a ("tipc: clean up handling of message priorities") we introduced a field in the packet header for keeping track of the priority of fragments, since this value is not present in the specified protocol header. Since the value so far only is used at the transmitting end of the link, we have not yet officially defined it as part of the protocol. Unfortunately, the field we use for keeping this value, bits 13-15 in in word 5, has turned out to be a poor choice; it is already used by the broadcast protocol for carrying the 'network id' field of the sending node. Since packet fragments also need to be transported across the broadcast protocol, the risk of conflict is obvious, and we see this happen when we use network identities larger than 2^13-1. This has escaped our testing because we have so far only been using small network id values. We now move this field to bits 0-2 in word 9, a field that is guaranteed to be unused by all involved protocols. Fixes: e3eea1eb47a ("tipc: clean up handling of message priorities") Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a82c5848d4bc..5351a3f97e8e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -357,7 +357,7 @@ static inline u32 msg_importance(struct tipc_msg *m) if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) return usr; if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) - return msg_bits(m, 5, 13, 0x7); + return msg_bits(m, 9, 0, 0x7); return TIPC_SYSTEM_IMPORTANCE; } @@ -366,7 +366,7 @@ static inline void msg_set_importance(struct tipc_msg *m, u32 i) int usr = msg_user(m); if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) - msg_set_bits(m, 5, 13, 0x7, i); + msg_set_bits(m, 9, 0, 0x7, i); else if (i < TIPC_SYSTEM_IMPORTANCE) msg_set_user(m, i); else From 53ca376eec4eb635e2249c8e89093f499e180731 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 Oct 2015 08:21:55 +0200 Subject: [PATCH 76/92] mlxsw: core: Fix race condition in __mlxsw_emad_transmit Under certain conditions EMAD responses can be returned from the device even before setting trans_active. This will cause the EMAD Rx listener to drop the EMAD response - as there are no active transactions - and timeouts will be generated. Fix this by setting trans_active before transmitting the EMAD skb. Fixes: 4ec14b7634b2 ("mlxsw: Add interface to access registers and process events") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index dbcaf5df8967..28c19cc1a17c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -374,26 +374,31 @@ static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, int err; int ret; + mlxsw_core->emad.trans_active = true; + err = mlxsw_core_skb_transmit(mlxsw_core->driver_priv, skb, tx_info); if (err) { dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n", mlxsw_core->emad.tid); dev_kfree_skb(skb); - return err; + goto trans_inactive_out; } - mlxsw_core->emad.trans_active = true; ret = wait_event_timeout(mlxsw_core->emad.wait, !(mlxsw_core->emad.trans_active), msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS)); if (!ret) { dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n", mlxsw_core->emad.tid); - mlxsw_core->emad.trans_active = false; - return -EIO; + err = -EIO; + goto trans_inactive_out; } return 0; + +trans_inactive_out: + mlxsw_core->emad.trans_active = false; + return err; } static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, From 168b8a25c0ac30f427bfe6ad547779c4c363d042 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 16 Oct 2015 10:07:49 +0300 Subject: [PATCH 77/92] Bluetooth: Fix double scan updates When disable/enable scan command is issued twice, some controllers will return an error for the second request, i.e. requests with this command will fail on some controllers, and succeed on others. This patch makes sure that unnecessary scan disable/enable commands are not issued. When adding device to the auto connect whitelist when there is pending connect attempt, there is no need to update scan. hci_connect_le_scan_cleanup is conditionally executing hci_conn_params_del, that is calling hci_update_background_scan. Make the other case also update scan, and remove reduntand call from hci_connect_le_scan_remove. When stopping interleaved discovery the state should be set to stopped only when both LE scanning and discovery has stopped. Signed-off-by: Jakub Pawlowski Acked-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 7 ++++--- net/bluetooth/hci_event.c | 7 ++++++- net/bluetooth/mgmt.c | 6 +++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b4548c739a64..2ebcaaa6b855 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -91,10 +91,12 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) * autoconnect action, remove them completely. If they are, just unmark * them as waiting for connection, by clearing explicit_connect field. */ - if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) + if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); - else + } else { params->explicit_connect = false; + hci_update_background_scan(conn->hdev); + } } /* This function requires the caller holds hdev->lock */ @@ -103,7 +105,6 @@ static void hci_connect_le_scan_remove(struct hci_conn *conn) hci_connect_le_scan_cleanup(conn); hci_conn_hash_del(conn->hdev, conn); - hci_update_background_scan(conn->hdev); } static void hci_acl_create_connection(struct hci_conn *conn) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 186041866315..509e41575633 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -55,7 +55,12 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) wake_up_bit(&hdev->flags, HCI_INQUIRY); hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* Set discovery state to stopped if we're not doing LE active + * scanning. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + hdev->le_scan_type != LE_SCAN_ACTIVE) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); hci_conn_check_pending(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ccaf5a436d8f..9a9bbc990d4f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6124,7 +6124,11 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, case HCI_AUTO_CONN_ALWAYS: if (!is_connected(hdev, addr, addr_type)) { list_add(¶ms->action, &hdev->pend_le_conns); - __hci_update_background_scan(req); + /* If we are in scan phase of connecting, we were + * already added to pend_le_conns and scanning. + */ + if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT) + __hci_update_background_scan(req); } break; } From b958f9a3e87766a88036616389eaaf3ad3bd5fc8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:50 +0300 Subject: [PATCH 78/92] Bluetooth: Fix reference counting for LE-scan based connections The code should never directly call hci_conn_hash_del since many cleanup & reference counting updates would be lost. Normally hci_conn_del is the right thing to do, but in the case of a connection doing LE scanning this could cause a deadlock due to doing a cancel_delayed_work_sync() on the same work callback that we were called from. Connections in the LE scanning state actually need very little cleanup - just a small subset of hci_conn_del. To solve the issue, refactor out these essential pieces into a new hci_conn_cleanup() function and call that from the two necessary places. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 53 +++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2ebcaaa6b855..4c240c1cb2cb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -99,12 +99,41 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) } } +static void hci_conn_cleanup(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) + hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); + + hci_chan_list_flush(conn); + + hci_conn_hash_del(hdev, conn); + + if (hdev->notify) + hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); + + hci_conn_del_sysfs(conn); + + debugfs_remove_recursive(conn->debugfs); + + hci_dev_put(hdev); + + hci_conn_put(conn); +} + /* This function requires the caller holds hdev->lock */ static void hci_connect_le_scan_remove(struct hci_conn *conn) { hci_connect_le_scan_cleanup(conn); - hci_conn_hash_del(conn->hdev, conn); + /* We can't call hci_conn_del here since that would deadlock + * with trying to call cancel_delayed_work_sync(&conn->disc_work). + * Instead, call just hci_conn_cleanup() which contains the bare + * minimum cleanup operations needed for a connection in this + * state. + */ + hci_conn_cleanup(conn); } static void hci_acl_create_connection(struct hci_conn *conn) @@ -582,27 +611,17 @@ int hci_conn_del(struct hci_conn *conn) } } - hci_chan_list_flush(conn); - if (conn->amp_mgr) amp_mgr_put(conn->amp_mgr); - hci_conn_hash_del(hdev, conn); - if (hdev->notify) - hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); - skb_queue_purge(&conn->data_q); - hci_conn_del_sysfs(conn); - - debugfs_remove_recursive(conn->debugfs); - - if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) - hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); - - hci_dev_put(hdev); - - hci_conn_put(conn); + /* Remove the connection from the list and cleanup its remaining + * state. This is a separate function since for some cases like + * BT_CONNECT_SCAN we *only* want the cleanup part without the + * rest of hci_conn_del. + */ + hci_conn_cleanup(conn); return 0; } From 49c509220db990ad003060db2267b9bbb597cd94 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:51 +0300 Subject: [PATCH 79/92] Bluetooth: Fix LE reconnection logic We can't use hci_explicit_connect_lookup() since that would only cover explicit connections, leaving normal reconnections completely untouched. Not using it in turn means leaving out entries in pend_le_reports. To fix this and simplify the logic move conn params from the reports list to the pend_le_conns list for the duration of an explicit connect. Once the connect is complete move the params back to the pend_le_reports list. This also means that the explicit connect lookup function only needs to look into the pend_le_conns list. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_core.c | 7 ------- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/mgmt.c | 5 ++++- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4c240c1cb2cb..d5c06eeab4a3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1000,8 +1000,8 @@ static int hci_explicit_conn_params_set(struct hci_request *req, /* If we created new params, or existing params were marked as disabled, * mark them to be used just once to connect. */ - if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { - params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + if (params->auto_connect == HCI_AUTO_CONN_DISABLED || + params->auto_connect == HCI_AUTO_CONN_REPORT) { list_del_init(¶ms->action); list_add(¶ms->action, &hdev->pend_le_conns); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index adcbc74c2432..e837539452fb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2861,13 +2861,6 @@ struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, return param; } - list_for_each_entry(param, &hdev->pend_le_reports, action) { - if (bacmp(¶m->addr, addr) == 0 && - param->addr_type == addr_type && - param->explicit_connect) - return param; - } - return NULL; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 509e41575633..bc31099d3b5b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4653,8 +4653,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, /* If we're not connectable only connect devices that we have in * our pend_le_conns list. */ - params = hci_explicit_connect_lookup(hdev, addr, addr_type); - + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, addr, + addr_type); if (!params) return NULL; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9a9bbc990d4f..4dbfe01546b3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6117,7 +6117,10 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, __hci_update_background_scan(req); break; case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &hdev->pend_le_reports); + if (params->explicit_connect) + list_add(¶ms->action, &hdev->pend_le_conns); + else + list_add(¶ms->action, &hdev->pend_le_reports); __hci_update_background_scan(req); break; case HCI_AUTO_CONN_DIRECT: From 679d2b6f9d742b3f091868bd9a0634647ce7e782 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:52 +0300 Subject: [PATCH 80/92] Bluetooth: Fix remove_device behavior for explicit connects Devices undergoing an explicit connect should not have their conn_params struct removed by the mgmt Remove Device command. This patch fixes the necessary checks in the command handler to correct the behavior. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4dbfe01546b3..0ed94e6f4de9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6386,7 +6386,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { + if (params->auto_connect == HCI_AUTO_CONN_DISABLED || + params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS); mgmt_pending_remove(cmd); @@ -6422,6 +6423,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, if (p->auto_connect == HCI_AUTO_CONN_DISABLED) continue; device_removed(sk, hdev, &p->addr, p->addr_type); + if (p->explicit_connect) { + p->auto_connect = HCI_AUTO_CONN_EXPLICIT; + continue; + } list_del(&p->action); list_del(&p->list); kfree(p); From 9ad3e6ffe189a988389d88ce33101668cb2d54c6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:53 +0300 Subject: [PATCH 81/92] Bluetooth: Fix conn_params list update in hci_connect_le_scan_cleanup After clearing the params->explicit_connect variable the parameters may need to be either added back to the right list or potentially left absent from both the le_reports and the le_conns lists. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d5c06eeab4a3..fe99025fb649 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -91,12 +91,27 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) * autoconnect action, remove them completely. If they are, just unmark * them as waiting for connection, by clearing explicit_connect field. */ - if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { + params->explicit_connect = false; + + list_del_init(¶ms->action); + + switch (params->auto_connect) { + case HCI_AUTO_CONN_EXPLICIT: hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); - } else { - params->explicit_connect = false; - hci_update_background_scan(conn->hdev); + /* return instead of break to avoid duplicate scan update */ + return; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + list_add(¶ms->action, &conn->hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &conn->hdev->pend_le_reports); + break; + default: + break; } + + hci_update_background_scan(conn->hdev); } static void hci_conn_cleanup(struct hci_conn *conn) From 5157b8a503fa834e8569c7fed06981e3d3d53db0 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 16 Oct 2015 10:07:54 +0300 Subject: [PATCH 82/92] Bluetooth: Fix initializing conn_params in scan phase This patch makes sure that conn_params that were created just for explicit_connect, will get properly deleted during cleanup. Signed-off-by: Jakub Pawlowski Acked-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 22 +++++++++++++++------- net/bluetooth/mgmt.c | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fe99025fb649..2dda439c8cb8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1008,15 +1008,23 @@ static int hci_explicit_conn_params_set(struct hci_request *req, if (is_connected(hdev, addr, addr_type)) return -EISCONN; - params = hci_conn_params_add(hdev, addr, addr_type); - if (!params) - return -EIO; + params = hci_conn_params_lookup(hdev, addr, addr_type); + if (!params) { + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -ENOMEM; - /* If we created new params, or existing params were marked as disabled, - * mark them to be used just once to connect. - */ + /* If we created new params, mark them to be deleted in + * hci_connect_le_scan_cleanup. It's different case than + * existing disabled params, those will stay after cleanup. + */ + params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + } + + /* We're trying to connect, so make sure params are at pend_le_conns */ if (params->auto_connect == HCI_AUTO_CONN_DISABLED || - params->auto_connect == HCI_AUTO_CONN_REPORT) { + params->auto_connect == HCI_AUTO_CONN_REPORT || + params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { list_del_init(¶ms->action); list_add(¶ms->action, &hdev->pend_le_conns); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0ed94e6f4de9..c4fe2fee753f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3545,6 +3545,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, auth_type); } else { u8 addr_type; + struct hci_conn_params *p; /* Convert from L2CAP channel address type to HCI address type */ @@ -3562,7 +3563,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * If connection parameters already exist, then they * will be kept and this function does nothing. */ - hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + + if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) + p->auto_connect = HCI_AUTO_CONN_DISABLED; conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, From ebfa45f0d952e5e7bb30a7f9daaad681de138728 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Oct 2015 16:39:57 -0700 Subject: [PATCH 83/92] ipv6: Move common init code for rt6_info to a new function rt6_info_init() Introduce rt6_info_init() to do the common init work for 'struct rt6_info' (after calling dst_alloc). It is a prep work to fix the rt6_info init logic in the ip6_blackhole_route(). Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Julian Anastasov Cc: Phil Sutter Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ed04e29a6aa3..4198017a5aa7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -319,6 +319,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif +static void rt6_info_init(struct rt6_info *rt) +{ + struct dst_entry *dst = &rt->dst; + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); +} + /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, @@ -327,13 +336,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); - if (rt) { - struct dst_entry *dst = &rt->dst; + if (rt) + rt6_info_init(rt); - memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); - INIT_LIST_HEAD(&rt->rt6i_uncached); - } return rt; } From 0a1f59620068fb82a2e2aded202e62f4bb856d52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Oct 2015 16:39:58 -0700 Subject: [PATCH 84/92] ipv6: Initialize rt6_info properly in ip6_blackhole_route() ip6_blackhole_route() does not initialize the newly allocated rt6_info properly. This patch: 1. Call rt6_info_init() to initialize rt6i_siblings and rt6i_uncached 2. The current rt->dst._metrics init code is incorrect: - 'rt->dst._metrics = ort->dst._metris' is not always safe - Not sure what dst_copy_metrics() is trying to do here considering ip6_rt_blackhole_cow_metrics() always returns NULL Fix: - Always do dst_copy_metrics() - Replace ip6_rt_blackhole_cow_metrics() with dst_cow_metrics_generic() 3. Mask out the RTF_PCPU bit from the newly allocated blackhole route. This bug triggers an oops (reported by Phil Sutter) in rt6_get_cookie(). It is because RTF_PCPU is set while rt->dst.from is NULL. Fixes: d52d3997f843 ("ipv6: Create percpu rt6_info") Signed-off-by: Martin KaFai Lau Reported-by: Phil Sutter Tested-by: Phil Sutter Cc: Hannes Frederic Sowa Cc: Julian Anastasov Cc: Phil Sutter Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4198017a5aa7..968f31c01f89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -248,12 +248,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, { } -static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, - unsigned long old) -{ - return NULL; -} - static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, .destroy = ip6_dst_destroy, @@ -262,7 +256,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, - .cow_metrics = ip6_rt_blackhole_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .neigh_lookup = ip6_neigh_lookup, }; @@ -1219,24 +1213,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); if (rt) { + rt6_info_init(rt); + new = &rt->dst; - - memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - new->__use = 1; new->input = dst_discard; new->output = dst_discard_sk; - if (dst_metrics_read_only(&ort->dst)) - new->_metrics = ort->dst._metrics; - else - dst_copy_metrics(new, &ort->dst); + dst_copy_metrics(new, &ort->dst); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; + rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); From 5f715c097965c0ad037f64393d0b95c50287775b Mon Sep 17 00:00:00 2001 From: Andrej Ota Date: Thu, 15 Oct 2015 00:14:37 +0200 Subject: [PATCH 85/92] via-rhine: fix VLAN receive handling regression. Because eth_type_trans() consumes ethernet header worth of bytes, a call to read TCI from end of packet using rhine_rx_vlan_tag() no longer works as it's reading from an invalid offset. Tested to be working on PCEngines Alix board. Fixes: 810f19bcb862 ("via-rhine: add consistent memory barrier in vlan receive code.") Signed-off-by: Andrej Ota Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index a83263743665..2b7550c43f78 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2134,10 +2134,11 @@ static int rhine_rx(struct net_device *dev, int limit) } skb_put(skb, pkt_len); - skb->protocol = eth_type_trans(skb, dev); rhine_rx_vlan_tag(skb, desc, data_size); + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); u64_stats_update_begin(&rp->rx_stats.syncp); From c7c49b8fde26b74277188bdc6c9dca38db6fa35b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Sep 2015 18:52:25 -0700 Subject: [PATCH 86/92] net: add pfmemalloc check in sk_add_backlog() Greg reported crashes hitting the following check in __sk_backlog_rcv() BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); The pfmemalloc bit is currently checked in sk_filter(). This works correctly for TCP, because sk_filter() is ran in tcp_v[46]_rcv() before hitting the prequeue or backlog checks. For UDP or other protocols, this does not work, because the sk_filter() is ran from sock_queue_rcv_skb(), which might be called _after_ backlog queuing if socket is owned by user by the time packet is processed by softirq handler. Fixes: b4b9e35585089 ("netvm: set PF_MEMALLOC as appropriate during SKB processing") Signed-off-by: Eric Dumazet Reported-by: Greg Thelen Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 7aa78440559a..e23717013a4e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -828,6 +828,14 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s if (sk_rcvqueues_full(sk, limit)) return -ENOBUFS; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; From db65a3aaf29ecce2e34271d52e8d2336b97bd9fe Mon Sep 17 00:00:00 2001 From: "Arad, Ronen" Date: Thu, 15 Oct 2015 01:55:17 -0700 Subject: [PATCH 87/92] netlink: Trim skb to alloc size to avoid MSG_TRUNC netlink_dump() allocates skb based on the calculated min_dump_alloc or a per socket max_recvmsg_len. min_alloc_size is maximum space required for any single netdev attributes as calculated by rtnl_calcit(). max_recvmsg_len tracks the user provided buffer to netlink_recvmsg. It is capped at 16KiB. The intention is to avoid small allocations and to minimize the number of calls required to obtain dump information for all net devices. netlink_dump packs as many small messages as could fit within an skb that was sized for the largest single netdev information. The actual space available within an skb is larger than what is requested. It could be much larger and up to near 2x with align to next power of 2 approach. Allowing netlink_dump to use all the space available within the allocated skb increases the buffer size a user has to provide to avoid truncaion (i.e. MSG_TRUNG flag set). It was observed that with many VLANs configured on at least one netdev, a larger buffer of near 64KiB was necessary to avoid "Message truncated" error in "ip link" or "bridge [-c[ompressvlans]] vlan show" when min_alloc_size was only little over 32KiB. This patch trims skb to allocated size in order to allow the user to avoid truncation with more reasonable buffer size. Signed-off-by: Ronen Arad Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8f060d7f9a0e..0a49a8c7c564 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2785,6 +2785,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; + int alloc_min_size; int alloc_size; mutex_lock(nlk->cb_mutex); @@ -2793,9 +2794,6 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - cb = &nlk->cb; - alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; @@ -2805,23 +2803,35 @@ static int netlink_dump(struct sock *sk) * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ - if (alloc_size < nlk->max_recvmsg_len) { - skb = netlink_alloc_skb(sk, - nlk->max_recvmsg_len, - nlk->portid, + cb = &nlk->cb; + alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + if (alloc_min_size < nlk->max_recvmsg_len) { + alloc_size = nlk->max_recvmsg_len; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - /* available room should be exact amount to avoid MSG_TRUNC */ - if (skb) - skb_reserve(skb, skb_tailroom(skb) - - nlk->max_recvmsg_len); } - if (!skb) + if (!skb) { + alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + } if (!skb) goto errout_skb; + + /* Trim skb to allocated size. User is expected to provide buffer as + * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * netlink_recvmsg())). dump will pack as many smaller messages as + * could fit within the allocated skb. skb is typically allocated + * with larger space than required (could be as much as near 2x the + * requested size with align to next power of 2 approach). Allowing + * dump to use the excess space makes it difficult for a user to have a + * reasonable static buffer based on the expected largest dump of a + * single netdev. The outcome is MSG_TRUNC error. + */ + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); From 80083a3c02ef4451edeef31a6f9afe130078f2bf Mon Sep 17 00:00:00 2001 From: Chia-Sheng Chang Date: Fri, 16 Oct 2015 02:00:21 +0800 Subject: [PATCH 88/92] net: asix: add support for the Billionton GUSB2AM-1G-B USB adapter Just another AX88178-based 10/100/1000 USB-to-Ethernet dongle. This one shows up in lsusb as: "ID 08dd:0114 Billionton Systems, Inc". Signed-off-by: Chia-Sheng Chang Cc: "David S. Miller" Cc: Luca Ceresoli Cc: Christoph Jaeger Cc: "Woojung.Huh@microchip.com" Cc: Matthew Garrett Cc: Markus Elfring Cc: Charles Keepax Cc: netdev@vger.kernel.org Cc: linux-usb@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 1 + drivers/net/usb/asix_devices.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index fbb9325d1f6e..e66805eeffb4 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -164,6 +164,7 @@ config USB_NET_AX8817X * Aten UC210T * ASIX AX88172 * Billionton Systems, USB2AR + * Billionton Systems, GUSB2AM-1G-B * Buffalo LUA-U2-KTX * Corega FEther USB2-TX * D-Link DUB-E100 diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 1173a24feda3..5cabefc23494 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -958,6 +958,10 @@ static const struct usb_device_id products [] = { // Billionton Systems, USB2AR USB_DEVICE (0x08dd, 0x90ff), .driver_info = (unsigned long) &ax8817x_info, +}, { + // Billionton Systems, GUSB2AM-1G-B + USB_DEVICE(0x08dd, 0x0114), + .driver_info = (unsigned long) &ax88178_info, }, { // ATEN UC210T USB_DEVICE (0x0557, 0x2009), From 833b8f18adfcca04070a8a42d545a4553379d36f Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Thu, 15 Oct 2015 18:02:28 +0000 Subject: [PATCH 89/92] xen-netback: correctly check failed allocation Since vzalloc can be failed in memory pressure, writes -ENOMEM to xenstore to indicate error. Signed-off-by: Insu Yun Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 929a6e7e5ecf..56ebd8267386 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -788,6 +788,12 @@ static void connect(struct backend_info *be) /* Use the number of queues requested by the frontend */ be->vif->queues = vzalloc(requested_num_queues * sizeof(struct xenvif_queue)); + if (!be->vif->queues) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating queues"); + return; + } + be->vif->num_queues = requested_num_queues; be->vif->stalled_queues = requested_num_queues; From 740dbc289155fdeed32438396370e70b684cd45e Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 16 Oct 2015 11:08:18 -0700 Subject: [PATCH 90/92] openvswitch: Scrub skb between namespaces If OVS receives a packet from another namespace, then the packet should be scrubbed. However, people have already begun to rely on the behaviour that skb->mark is preserved across namespaces, so retain this one field. This is mainly to address information leakage between namespaces when using OVS internal ports, but by placing it in ovs_vport_receive() it is more generally applicable, meaning it should not be overlooked if other port types are allowed to be moved into namespaces in future. Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index fc5c0b9ccfe9..12a36ac21eda 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -444,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; + if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { + u32 mark; + + mark = skb->mark; + skb_scrub_packet(skb, true); + skb->mark = mark; + tun_info = NULL; + } + /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { From e277de5f3f7d6eed2a41920983c44c4df386b871 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 16 Oct 2015 16:36:00 -0700 Subject: [PATCH 91/92] tunnels: Don't require remote endpoint or ID during creation. Before lightweight tunnels existed, it really didn't make sense to create a tunnel that was not fully specified, such as without a destination IP address - the resulting packets would go nowhere. However, with lightweight tunnels, the opposite is true - it doesn't make sense to require this information when it will be provided later on by the route. This loosens the requirements for this information. An alternative would be to allow the relaxed version only when COLLECT_METADATA is enabled. However, since there are several variations on this theme (such as NBMA tunnels in GRE), just dropping the restrictions seems the most consistent across tunnels and with the existing configuration. CC: John Linville Signed-off-by: Jesse Gross Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- drivers/net/geneve.c | 12 ++++++------ drivers/net/vxlan.c | 7 +++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 8f5c02eed47d..cde29f8a37bf 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -870,14 +870,14 @@ static int geneve_newlink(struct net *net, struct net_device *dev, __be16 dst_port = htons(GENEVE_UDP_PORT); __u8 ttl = 0, tos = 0; bool metadata = false; - __be32 rem_addr; - __u32 vni; + __be32 rem_addr = 0; + __u32 vni = 0; - if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) - return -EINVAL; + if (data[IFLA_GENEVE_ID]) + vni = nla_get_u32(data[IFLA_GENEVE_ID]); - vni = nla_get_u32(data[IFLA_GENEVE_ID]); - rem_addr = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); + if (data[IFLA_GENEVE_REMOTE]) + rem_addr = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); if (data[IFLA_GENEVE_TTL]) ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index bbac1d35ed4e..afdc65fd5bc5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2745,11 +2745,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct vxlan_config conf; int err; - if (!data[IFLA_VXLAN_ID]) - return -EINVAL; - memset(&conf, 0, sizeof(conf)); - conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]); + + if (data[IFLA_VXLAN_ID]) + conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]); if (data[IFLA_VXLAN_GROUP]) { conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); From 37850e37fcfb4dd831bc9e33221e8c49a732956f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 17 Oct 2015 14:22:46 -0700 Subject: [PATCH 92/92] net: bcmgenet: Fix early link interrupt enabling Link interrupts are enabled in init_umac(), which is too early for us to process them since we do not yet have a valid PHY device pointer. On BCM7425 chips for instance, we will crash calling phy_mac_interrupt() because phydev is NULL. Fix this by moving the link interrupts enabling in bcmgenet_netif_start(), under a specific function: bcmgenet_link_intr_enable() and while at it, update the comments surrounding the code. Fixes: 6cc8e6d4dcb36 ("net: bcmgenet: Delay PHY initialization to bcmgenet_open()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 3bc701e4c59e..1805541b4240 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1683,6 +1683,24 @@ static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) bcmgenet_intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); } +static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) +{ + u32 int0_enable = 0; + + /* Monitor cable plug/unplugged event for internal PHY, external PHY + * and MoCA PHY + */ + if (priv->internal_phy) { + int0_enable |= UMAC_IRQ_LINK_EVENT; + } else if (priv->ext_phy) { + int0_enable |= UMAC_IRQ_LINK_EVENT; + } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { + if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) + int0_enable |= UMAC_IRQ_LINK_EVENT; + } + bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); +} + static int init_umac(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; @@ -1723,15 +1741,8 @@ static int init_umac(struct bcmgenet_priv *priv) /* Enable Tx default queue 16 interrupts */ int0_enable |= UMAC_IRQ_TXDMA_DONE; - /* Monitor cable plug/unplugged event for internal PHY */ - if (priv->internal_phy) { - int0_enable |= UMAC_IRQ_LINK_EVENT; - } else if (priv->ext_phy) { - int0_enable |= UMAC_IRQ_LINK_EVENT; - } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { - if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - int0_enable |= UMAC_IRQ_LINK_EVENT; - + /* Configure backpressure vectors for MoCA */ + if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { reg = bcmgenet_bp_mc_get(priv); reg |= BIT(priv->hw_params->bp_in_en_shift); @@ -2645,6 +2656,9 @@ static void bcmgenet_netif_start(struct net_device *dev) netif_tx_start_all_queues(dev); + /* Monitor link interrupts now */ + bcmgenet_link_intr_enable(priv); + phy_start(priv->phydev); }