From bc8e71314e8444c6315c482441f3204c032ab327 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 12 Apr 2020 11:45:47 +0300 Subject: [PATCH 001/100] netfilter: flowtable: Free block_cb when being deleted Free block_cb memory when asked to be deleted. Fixes: 978703f42549 ("netfilter: flowtable: Add API for registering to flow table events") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index c0cb79495c35..4344e572b7f9 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -421,10 +421,12 @@ void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, down_write(&flow_table->flow_block_lock); block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) + if (block_cb) { list_del(&block_cb->list); - else + flow_block_cb_free(block_cb); + } else { WARN_ON(true); + } up_write(&flow_table->flow_block_lock); } EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); From e0c5c33dd237abbcaade9ff7b77d995797d6cb37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20ANDR=C3=89-CHANG?= Date: Wed, 15 Apr 2020 15:00:52 +0100 Subject: [PATCH 002/100] MAINTAINERS: Update URL for wireless drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, http://wireless.kernel.org would redirect to https://wireless.wiki.kernel.org, however, this is no longer the case and most pages return 404. https is used because http://wireless.kernel.org/* redirects to https://wireless.wiki.kernel.org/* Signed-off-by: Nils ANDRÉ-CHANG Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200415140052.2lftkixe37llmtjl@nixos --- MAINTAINERS | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6851ef7cf1bd..59138946239a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -189,7 +189,7 @@ F: drivers/net/hamradio/6pack.c M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git F: Documentation/driver-api/80211/cfg80211.rst @@ -505,7 +505,7 @@ F: drivers/hwmon/adm1029.c ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ F: drivers/net/wireless/admtek/adm8211.* ADP1653 FLASH CONTROLLER DRIVER @@ -2847,14 +2847,14 @@ M: Nick Kossifidis M: Luis Chamberlain L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/en/users/Drivers/ath5k +W: https://wireless.wiki.kernel.org/en/users/Drivers/ath5k F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported -W: http://wireless.kernel.org/en/users/Drivers/ath6kl +W: https://wireless.wiki.kernel.org/en/users/Drivers/ath6kl T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath6kl/ @@ -3017,7 +3017,7 @@ B43 WIRELESS DRIVER L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org S: Odd Fixes -W: http://wireless.kernel.org/en/users/Drivers/b43 +W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43/ B43LEGACY WIRELESS DRIVER @@ -3025,7 +3025,7 @@ M: Larry Finger L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org S: Maintained -W: http://wireless.kernel.org/en/users/Drivers/b43 +W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43legacy/ BACKLIGHT CLASS/SUBSYSTEM @@ -3840,7 +3840,7 @@ CARL9170 LINUX COMMUNITY WIRELESS DRIVER M: Christian Lamparter L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/en/users/Drivers/carl9170 +W: https://wireless.wiki.kernel.org/en/users/Drivers/carl9170 F: drivers/net/wireless/ath/carl9170/ CAVIUM I2C DRIVER @@ -10064,7 +10064,7 @@ MAC80211 M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git F: Documentation/networking/mac80211-injection.txt @@ -12645,7 +12645,7 @@ F: fs/orangefs/ ORINOCO DRIVER L: linux-wireless@vger.kernel.org S: Orphan -W: http://wireless.kernel.org/en/users/Drivers/orinoco +W: https://wireless.wiki.kernel.org/en/users/Drivers/orinoco W: http://www.nongnu.org/orinoco/ F: drivers/net/wireless/intersil/orinoco/ @@ -12671,7 +12671,7 @@ P54 WIRELESS DRIVER M: Christian Lamparter L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/en/users/Drivers/p54 +W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 F: drivers/net/wireless/intersil/p54/ PACKING @@ -13592,7 +13592,7 @@ PRISM54 WIRELESS DRIVER M: Luis Chamberlain L: linux-wireless@vger.kernel.org S: Obsolete -W: http://wireless.kernel.org/en/users/Drivers/p54 +W: https://wireless.wiki.kernel.org/en/users/Drivers/p54 F: drivers/net/wireless/intersil/prism54/ PROC FILESYSTEM @@ -13932,7 +13932,7 @@ QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Kalle Valo L: ath10k@lists.infradead.org S: Supported -W: http://wireless.kernel.org/en/users/Drivers/ath10k +W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath10k/ @@ -13947,7 +13947,7 @@ QUALCOMM ATHEROS ATH9K WIRELESS DRIVER M: QCA ath9k Development L: linux-wireless@vger.kernel.org S: Supported -W: http://wireless.kernel.org/en/users/Drivers/ath9k +W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k F: drivers/net/wireless/ath/ath9k/ QUALCOMM CAMERA SUBSYSTEM DRIVER @@ -14044,7 +14044,7 @@ QUALCOMM WCN36XX WIRELESS DRIVER M: Kalle Valo L: wcn36xx@lists.infradead.org S: Supported -W: http://wireless.kernel.org/en/users/Drivers/wcn36xx +W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx T: git git://github.com/KrasnikovEugene/wcn36xx.git F: drivers/net/wireless/ath/wcn36xx/ @@ -14272,7 +14272,7 @@ REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git F: drivers/net/wireless/realtek/rtlwifi/ @@ -14407,7 +14407,7 @@ RFKILL M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git F: Documentation/ABI/stable/sysfs-class-rfkill @@ -14556,7 +14556,7 @@ F: drivers/media/dvb-frontends/rtl2832_sdr* RTL8180 WIRELESS DRIVER L: linux-wireless@vger.kernel.org S: Orphan -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git F: drivers/net/wireless/realtek/rtl818x/rtl8180/ @@ -14566,7 +14566,7 @@ M: Hin-Tak Leung M: Larry Finger L: linux-wireless@vger.kernel.org S: Maintained -W: http://wireless.kernel.org/ +W: https://wireless.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git F: drivers/net/wireless/realtek/rtl818x/rtl8187/ @@ -16921,8 +16921,8 @@ F: drivers/media/platform/ti-vpe/ TI WILINK WIRELESS DRIVERS L: linux-wireless@vger.kernel.org S: Orphan -W: http://wireless.kernel.org/en/users/Drivers/wl12xx -W: http://wireless.kernel.org/en/users/Drivers/wl1251 +W: https://wireless.wiki.kernel.org/en/users/Drivers/wl12xx +W: https://wireless.wiki.kernel.org/en/users/Drivers/wl1251 T: git git://git.kernel.org/pub/scm/linux/kernel/git/luca/wl12xx.git F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h @@ -18204,7 +18204,7 @@ M: Maya Erez L: linux-wireless@vger.kernel.org L: wil6210@qti.qualcomm.com S: Supported -W: http://wireless.kernel.org/en/users/Drivers/wil6210 +W: https://wireless.wiki.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ WIMAX STACK From 1f8fbe9c93faf1dd56c7268126f9ffe10750f193 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Thu, 16 Apr 2020 21:28:03 +0300 Subject: [PATCH 003/100] MAINTAINERS: update list of qtnfmac maintainers Removing Avinash since tomorrow is his last day at Quantenna. Signed-off-by: Sergey Matyukevich Signed-off-by: Igor Mitsyanko Signed-off-by: Avinash Patil Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200416182803.31201-1-sergey.matyukevich.os@quantenna.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 59138946239a..8536659c6a5b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14050,7 +14050,6 @@ F: drivers/net/wireless/ath/wcn36xx/ QUANTENNA QTNFMAC WIRELESS DRIVER M: Igor Mitsyanko -M: Avinash Patil M: Sergey Matyukevich L: linux-wireless@vger.kernel.org S: Maintained From d03f228470a8c0a22b774d1f8d47071e0de4f6dd Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:36:19 +0800 Subject: [PATCH 004/100] net: netrom: Fix potential nr_neigh refcnt leak in nr_add_node nr_add_node() invokes nr_neigh_get_dev(), which returns a local reference of the nr_neigh object to "nr_neigh" with increased refcnt. When nr_add_node() returns, "nr_neigh" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in one normal path of nr_add_node(), which forgets to decrease the refcnt increased by nr_neigh_get_dev() and causes a refcnt leak. It should decrease the refcnt before the function returns like other normal paths do. Fix this issue by calling nr_neigh_put() before the nr_add_node() returns. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 79f12d8c7b86..0891ee02ca4f 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -208,6 +208,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* refcount initialized at 1 */ spin_unlock_bh(&nr_node_list_lock); + nr_neigh_put(nr_neigh); return 0; } nr_node_lock(nr_node); From 441870ee4240cf67b5d3ab8e16216a9ff42eb5d6 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:39:56 +0800 Subject: [PATCH 005/100] tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv tipc_crypto_rcv() invokes tipc_aead_get(), which returns a reference of the tipc_aead object to "aead" with increased refcnt. When tipc_crypto_rcv() returns, the original local reference of "aead" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in one error path of tipc_crypto_rcv(). When TIPC message decryption status is EINPROGRESS or EBUSY, the function forgets to decrease the refcnt increased by tipc_aead_get() and causes a refcnt leak. Fix this issue by calling tipc_aead_put() on the error path when TIPC message decryption status is EINPROGRESS or EBUSY. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- net/tipc/crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c8c47fc72653..8c47ded2edb6 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1712,6 +1712,7 @@ exit: case -EBUSY: this_cpu_inc(stats->stat[STAT_ASYNC]); *skb = NULL; + tipc_aead_put(aead); return rc; default: this_cpu_inc(stats->stat[STAT_NOK]); From de058420767df21e2b6b0f3bb36d1616fb962032 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:40:28 +0800 Subject: [PATCH 006/100] tipc: Fix potential tipc_node refcnt leak in tipc_rcv tipc_rcv() invokes tipc_node_find() twice, which returns a reference of the specified tipc_node object to "n" with increased refcnt. When tipc_rcv() returns or a new object is assigned to "n", the original local reference of "n" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in some paths of tipc_rcv(), which forget to decrease the refcnt increased by tipc_node_find() and will cause a refcnt leak. Fix this issue by calling tipc_node_put() before the original object pointed by "n" becomes invalid. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- net/tipc/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 10292c942384..803a3a6d0f50 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2038,6 +2038,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) n = tipc_node_find_by_id(net, ehdr->id); } tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); + tipc_node_put(n); if (!skb) return; @@ -2090,7 +2091,7 @@ rcv: /* Check/update node state before receiving */ if (unlikely(skb)) { if (unlikely(skb_linearize(skb))) - goto discard; + goto out_node_put; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { @@ -2119,6 +2120,7 @@ rcv: if (!skb_queue_empty(&xmitq)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); +out_node_put: tipc_node_put(n); discard: kfree_skb(skb); From 7717cbec172c3554d470023b4020d5781961187e Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 15 Apr 2020 16:41:20 +0800 Subject: [PATCH 007/100] wimax/i2400m: Fix potential urb refcnt leak i2400mu_bus_bm_wait_for_ack() invokes usb_get_urb(), which increases the refcount of the "notif_urb". When i2400mu_bus_bm_wait_for_ack() returns, local variable "notif_urb" becomes invalid, so the refcount should be decreased to keep refcount balanced. The issue happens in all paths of i2400mu_bus_bm_wait_for_ack(), which forget to decrease the refcnt increased by usb_get_urb(), causing a refcnt leak. Fix this issue by calling usb_put_urb() before the i2400mu_bus_bm_wait_for_ack() returns. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/usb-fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/usb-fw.c b/drivers/net/wimax/i2400m/usb-fw.c index 529ebca1e9e1..1f7709d24f35 100644 --- a/drivers/net/wimax/i2400m/usb-fw.c +++ b/drivers/net/wimax/i2400m/usb-fw.c @@ -354,6 +354,7 @@ out: usb_autopm_put_interface(i2400mu->usb_iface); d_fnend(8, dev, "(i2400m %p ack %p size %zu) = %ld\n", i2400m, ack, ack_size, (long) result); + usb_put_urb(¬if_urb); return result; error_exceeded: From 15ce30609d1e88d42fb1cd948f453e6d5f188249 Mon Sep 17 00:00:00 2001 From: Julien Beraud Date: Wed, 15 Apr 2020 14:24:31 +0200 Subject: [PATCH 008/100] net: stmmac: fix enabling socfpga's ptp_ref_clock There are 2 registers to write to enable a ptp ref clock coming from the fpga. One that enables the usage of the clock from the fpga for emac0 and emac1 as a ptp ref clock, and the other to allow signals from the fpga to reach emac0 and emac1. Currently, if the dwmac-socfpga has phymode set to PHY_INTERFACE_MODE_MII, PHY_INTERFACE_MODE_GMII, or PHY_INTERFACE_MODE_SGMII, both registers will be written and the ptp ref clock will be set as coming from the fpga. Separate the 2 register writes to only enable signals from the fpga to reach emac0 or emac1 when ptp ref clock is not coming from the fpga. Signed-off-by: Julien Beraud Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index fa32cd5b418e..70d41783329d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -291,16 +291,19 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) phymode == PHY_INTERFACE_MODE_MII || phymode == PHY_INTERFACE_MODE_GMII || phymode == PHY_INTERFACE_MODE_SGMII) { - ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); regmap_read(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, &module); module |= (SYSMGR_FPGAGRP_MODULE_EMAC << (reg_shift / 2)); regmap_write(sys_mgr_base_addr, SYSMGR_FPGAGRP_MODULE_REG, module); - } else { - ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2)); } + if (dwmac->f2h_ptp_ref_clk) + ctrl |= SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << (reg_shift / 2); + else + ctrl &= ~(SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK << + (reg_shift / 2)); + regmap_write(sys_mgr_base_addr, reg_offset, ctrl); /* Deassert reset for the phy configuration to be sampled by From 91a2559c1dc5b0f7e1256d42b1508935e8eabfbf Mon Sep 17 00:00:00 2001 From: Julien Beraud Date: Wed, 15 Apr 2020 14:24:32 +0200 Subject: [PATCH 009/100] net: stmmac: Fix sub-second increment In fine adjustement mode, which is the current default, the sub-second increment register is the number of nanoseconds that will be added to the clock when the accumulator overflows. At each clock cycle, the value of the addend register is added to the accumulator. Currently, we use 20ns = 1e09ns / 50MHz as this value whatever the frequency of the ptp clock actually is. The adjustment is then done on the addend register, only incrementing every X clock cycles X being the ratio between 50MHz and ptp_clock_rate (addend = 2^32 * 50MHz/ptp_clock_rate). This causes the following issues : - In case the frequency of the ptp clock is inferior or equal to 50MHz, the addend value calculation will overflow and the default addend value will be set to 0, causing the clock to not work at all. (For instance, for ptp_clock_rate = 50MHz, addend = 2^32). - The resolution of the timestamping clock is limited to 20ns while it is not needed, thus limiting the accuracy of the timestamping to 20ns. Fix this by setting sub-second increment to 2e09ns / ptp_clock_rate. It will allow to reach the minimum possible frequency for ptp_clk_ref, which is 5MHz for GMII 1000Mps Full-Duplex by setting the sub-second-increment to a higher value. For instance, for 25MHz, it gives ssinc = 80ns and default_addend = 2^31. It will also allow to use a lower value for sub-second-increment, thus improving the timestamping accuracy with frequencies higher than 100MHz, for instance, for 200MHz, ssinc = 10ns and default_addend = 2^31. v1->v2: - Remove modifications to the calculation of default addend, which broke compatibility with clock frequencies for which 2000000000 / ptp_clk_freq is not an integer. - Modify description according to discussions. Signed-off-by: Julien Beraud Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index fcf080243a0f..d291612eeafb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -27,12 +27,16 @@ static void config_sub_second_increment(void __iomem *ioaddr, unsigned long data; u32 reg_value; - /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second - * formula = (1/ptp_clock) * 1000000000 - * where ptp_clock is 50MHz if fine method is used to update system + /* For GMAC3.x, 4.x versions, in "fine adjustement mode" set sub-second + * increment to twice the number of nanoseconds of a clock cycle. + * The calculation of the default_addend value by the caller will set it + * to mid-range = 2^31 when the remainder of this division is zero, + * which will make the accumulator overflow once every 2 ptp_clock + * cycles, adding twice the number of nanoseconds of a clock cycle : + * 2000000000ULL / ptp_clock. */ if (value & PTP_TCR_TSCFUPDT) - data = (1000000000ULL / 50000000); + data = (2000000000ULL / ptp_clock); else data = (1000000000ULL / ptp_clock); From 62e697767fac598518bc687d3e0dafdd7c2f09f5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 15 Apr 2020 09:06:53 -0400 Subject: [PATCH 010/100] ipv6: rpl: fix full address compression This patch makes it impossible that cmpri or cmpre values are set to the value 16 which is not possible, because these are 4 bit values. We currently run in an overflow when assigning the value 16 to it. According to the standard a value of 16 can be interpreted as a full elided address which isn't possible to set as compression value. A reason why this cannot be set is that the current ipv6 header destination address should never show up inside the segments of the rpl header. In this case we run in a overflow and the address will have no compression at all. Means cmpri or compre is set to 0. As we handle cmpri and cmpre sometimes as unsigned char or 4 bit value inside the rpl header the current behaviour ends in an invalid header format. This patch simple use the best compression method if we ever run into the case that the destination address is showed up inside the rpl segments. We avoid the overflow handling and the rpl header is still valid, even when we have the destination address inside the rpl segments. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- net/ipv6/rpl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index d38b476fc7f2..307f336b5353 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -8,6 +8,7 @@ #include #define IPV6_PFXTAIL_LEN(x) (sizeof(struct in6_addr) - (x)) +#define IPV6_RPL_BEST_ADDR_COMPRESSION 15 static void ipv6_rpl_addr_decompress(struct in6_addr *dst, const struct in6_addr *daddr, @@ -73,7 +74,7 @@ static unsigned char ipv6_rpl_srh_calc_cmpri(const struct ipv6_rpl_sr_hdr *inhdr } } - return plen; + return IPV6_RPL_BEST_ADDR_COMPRESSION; } static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr, @@ -83,10 +84,10 @@ static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr, for (plen = 0; plen < sizeof(*daddr); plen++) { if (daddr->s6_addr[plen] != last_segment->s6_addr[plen]) - break; + return plen; } - return plen; + return IPV6_RPL_BEST_ADDR_COMPRESSION; } void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr, From 310660a14b74c380b0ef5c12b66933d6a3d1b59f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Apr 2020 09:46:52 -0700 Subject: [PATCH 011/100] net/mlx4_en: avoid indirect call in TX completion Commit 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") brought another indirect call in fast path. Use INDIRECT_CALL_2() helper to avoid the cost of the indirect call when/if CONFIG_RETPOLINE=y Signed-off-by: Eric Dumazet Cc: Tariq Toukan Cc: Willem de Bruijn Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4d5ca302c067..a30edb436f4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mlx4_en.h" @@ -261,6 +262,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } } +INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode)); u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, @@ -329,6 +334,11 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +INDIRECT_CALLABLE_DECLARE(u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u64 timestamp, + int napi_mode)); + u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u64 timestamp, @@ -449,7 +459,9 @@ bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = ring->free_tx_desc( + last_nr_txbb = INDIRECT_CALL_2(ring->free_tx_desc, + mlx4_en_free_tx_desc, + mlx4_en_recycle_tx_desc, priv, ring, ring_index, timestamp, napi_budget); From df1036da90108b1a9969721beab34f4c76228bcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Apr 2020 09:28:22 +0200 Subject: [PATCH 012/100] mptcp: fix splat when incoming connection is never accepted before exit/close Following snippet (replicated from syzkaller reproducer) generates warning: "IPv4: Attempt to release TCP socket in state 1". int main(void) { struct sockaddr_in sin1 = { .sin_family = 2, .sin_port = 0x4e20, .sin_addr.s_addr = 0x010000e0, }; struct sockaddr_in sin2 = { .sin_family = 2, .sin_addr.s_addr = 0x0100007f, }; struct sockaddr_in sin3 = { .sin_family = 2, .sin_port = 0x4e20, .sin_addr.s_addr = 0x0100007f, }; int r0 = socket(0x2, 0x1, 0x106); int r1 = socket(0x2, 0x1, 0x106); bind(r1, (void *)&sin1, sizeof(sin1)); connect(r1, (void *)&sin2, sizeof(sin2)); listen(r1, 3); return connect(r0, (void *)&sin3, 0x4d); } Reason is that the newly generated mptcp socket is closed via the ulp release of the tcp listener socket when its accept backlog gets purged. To fix this, delay setting the ESTABLISHED state until after userspace calls accept and via mptcp specific destructor. Fixes: 58b09919626bf ("mptcp: create msk early") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/9 Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 1 + net/mptcp/subflow.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9936e33ac351..1c8b021b4537 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1431,6 +1431,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); + inet_sk_state_store(newsk, TCP_ESTABLISHED); bh_unlock_sock(new_mptcp_sock); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50a8bea987c6..57a836fe4988 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -347,6 +347,29 @@ static bool subflow_hmac_valid(const struct request_sock *req, return ret; } +static void mptcp_sock_destruct(struct sock *sk) +{ + /* if new mptcp socket isn't accepted, it is free'd + * from the tcp listener sockets request queue, linked + * from req->sk. The tcp socket is released. + * This calls the ULP release function which will + * also remove the mptcp socket, via + * sock_put(ctx->conn). + * + * Problem is that the mptcp socket will not be in + * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Both result in warnings from inet_sock_destruct. + */ + + if (sk->sk_state == TCP_SYN_RECV) { + sk->sk_state = TCP_CLOSE; + WARN_ON_ONCE(sk->sk_socket); + sock_orphan(sk); + } + + inet_sock_destruct(sk); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -422,7 +445,7 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ - inet_sk_state_store(new_msk, TCP_ESTABLISHED); + new_msk->sk_destruct = mptcp_sock_destruct; mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; From 9f5ca6a59816b406230adc440b6bb684fda90abe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Apr 2020 09:28:23 +0200 Subject: [PATCH 013/100] mptcp: fix 'Attempt to release TCP socket in state' warnings We need to set sk_state to CLOSED, else we will get following: IPv4: Attempt to release TCP socket in state 3 00000000b95f109e IPv4: Attempt to release TCP socket in state 10 00000000b95f109e First one is from inet_sock_destruct(), second one from mptcp_sk_clone failure handling. Setting sk_state to CLOSED isn't enough, we also need to orphan sk so it has DEAD flag set. Otherwise, a very similar warning is printed from inet_sock_destruct(). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 7 +++++-- net/mptcp/subflow.c | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c8b021b4537..7e816c733ccb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1355,12 +1355,15 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->subflow = NULL; if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + nsk->sk_state = TCP_CLOSE; bh_unlock_sock(nsk); /* we can't call into mptcp_close() here - possible BH context - * free the sock directly + * free the sock directly. + * sk_clone_lock() sets nsk refcnt to two, hence call sk_free() + * too. */ - nsk->sk_prot->destroy(nsk); + sk_common_release(nsk); sk_free(nsk); return NULL; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 57a836fe4988..bc46b5091b9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -370,6 +370,12 @@ static void mptcp_sock_destruct(struct sock *sk) inet_sock_destruct(sk); } +static void mptcp_force_close(struct sock *sk) +{ + inet_sk_state_store(sk, TCP_CLOSE); + sk_common_release(sk); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -467,7 +473,7 @@ create_child: out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) - sock_put(new_msk); + mptcp_force_close(new_msk); return child; close_child: From 9bacd256f1354883d3c1402655153367982bba49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Apr 2020 07:10:23 -0700 Subject: [PATCH 014/100] tcp: cache line align MAX_TCP_HEADER TCP stack is dumb in how it cooks its output packets. Depending on MAX_HEADER value, we might chose a bad ending point for the headers. If we align the end of TCP headers to cache line boundary, we make sure to always use the smallest number of cache lines, which always help. Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5fa9eacd965a..dcf9a72eeaa6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -51,7 +51,7 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; void tcp_time_wait(struct sock *sk, int state, int timeo); -#define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) From b4faef1739dd1f3b3981b8bf173a2266ea86b1eb Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Sat, 18 Apr 2020 16:28:32 +0800 Subject: [PATCH 015/100] netfilter: nat: fix error handling upon registering inet hook A case of warning was reported by syzbot. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 19934 at net/netfilter/nf_nat_core.c:1106 nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 19934 Comm: syz-executor.5 Not tainted 5.6.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 panic+0x2e3/0x75c kernel/panic.c:221 __warn.cold+0x2f/0x35 kernel/panic.c:582 report_bug+0x27b/0x2f0 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:175 [inline] fixup_bug arch/x86/kernel/traps.c:170 [inline] do_error_trap+0x12b/0x220 arch/x86/kernel/traps.c:267 do_invalid_op+0x32/0x40 arch/x86/kernel/traps.c:286 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:nf_nat_unregister_fn+0x532/0x5c0 net/netfilter/nf_nat_core.c:1106 Code: ff df 48 c1 ea 03 80 3c 02 00 75 75 48 8b 44 24 10 4c 89 ef 48 c7 00 00 00 00 00 e8 e8 f8 53 fb e9 4d fe ff ff e8 ee 9c 16 fb <0f> 0b e9 41 fe ff ff e8 e2 45 54 fb e9 b5 fd ff ff 48 8b 7c 24 20 RSP: 0018:ffffc90005487208 EFLAGS: 00010246 RAX: 0000000000040000 RBX: 0000000000000004 RCX: ffffc9001444a000 RDX: 0000000000040000 RSI: ffffffff865c94a2 RDI: 0000000000000005 RBP: ffff88808b5cf000 R08: ffff8880a2620140 R09: fffffbfff14bcd79 R10: ffffc90005487208 R11: fffffbfff14bcd78 R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000000 nf_nat_ipv6_unregister_fn net/netfilter/nf_nat_proto.c:1017 [inline] nf_nat_inet_register_fn net/netfilter/nf_nat_proto.c:1038 [inline] nf_nat_inet_register_fn+0xfc/0x140 net/netfilter/nf_nat_proto.c:1023 nf_tables_register_hook net/netfilter/nf_tables_api.c:224 [inline] nf_tables_addchain.constprop.0+0x82e/0x13c0 net/netfilter/nf_tables_api.c:1981 nf_tables_newchain+0xf68/0x16a0 net/netfilter/nf_tables_api.c:2235 nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6bf/0x7e0 net/socket.c:2362 ___sys_sendmsg+0x100/0x170 net/socket.c:2416 __sys_sendmsg+0xec/0x1b0 net/socket.c:2449 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 entry_SYSCALL_64_after_hwframe+0x49/0xb3 and to quiesce it, unregister NFPROTO_IPV6 hook instead of NFPROTO_INET in case of failing to register NFPROTO_IPV4 hook. Reported-by: syzbot Fixes: d164385ec572 ("netfilter: nat: add inet family nat support") Cc: Florian Westphal Cc: Stefano Brivio Signed-off-by: Hillf Danton Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 64eedc17037a..3d816a1e5442 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -1035,8 +1035,8 @@ int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); if (ret) - nf_nat_ipv6_unregister_fn(net, ops); - + nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, + ARRAY_SIZE(nf_nat_ipv6_ops)); return ret; } EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); From 27de77cec985233bdf6546437b9761853265c505 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 17 Apr 2020 02:57:31 +0800 Subject: [PATCH 016/100] net: openvswitch: ovs_ct_exit to be done under ovs_lock syzbot wrote: | ============================= | WARNING: suspicious RCU usage | 5.7.0-rc1+ #45 Not tainted | ----------------------------- | net/openvswitch/conntrack.c:1898 RCU-list traversed in non-reader section!! | | other info that might help us debug this: | rcu_scheduler_active = 2, debug_locks = 1 | ... | | stack backtrace: | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 | Workqueue: netns cleanup_net | Call Trace: | ... | ovs_ct_exit | ovs_exit_net | ops_exit_list.isra.7 | cleanup_net | process_one_work | worker_thread To avoid that warning, invoke the ovs_ct_exit under ovs_lock and add lockdep_ovsl_is_held as optional lockdep expression. Link: https://lore.kernel.org/lkml/000000000000e642a905a0cbee6e@google.com Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Cc: Pravin B Shelar Cc: Yi-Hung Wei Reported-by: syzbot+7ef50afd3a211f879112@syzkaller.appspotmail.com Signed-off-by: Tonghao Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 3 ++- net/openvswitch/datapath.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e726159cfcfa..4340f25fe390 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1895,7 +1895,8 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node) + hlist_for_each_entry_rcu(ct_limit, head, hlist_node, + lockdep_ovsl_is_held()) kfree_rcu(ct_limit, rcu); } kfree(ovs_net->ct_limit_info->limits); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8ae541d22a8..94b024534987 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2466,8 +2466,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) struct net *net; LIST_HEAD(head); - ovs_ct_exit(dnet); ovs_lock(); + + ovs_ct_exit(dnet); + list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); From bdbe05b381ecb65494199abf044be3a8d5530b58 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Sat, 18 Apr 2020 16:51:06 +0800 Subject: [PATCH 017/100] net: systemport: Omit superfluous error message in bcm_sysport_probe() In the function bcm_sysport_probe(), when get irq failed, the function platform_get_irq() logs an error message, so remove redundant message here. Signed-off-by: Tang Bin Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index af7ce5c5488c..c99e5a3fa746 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2475,7 +2475,6 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->wol_irq = platform_get_irq(pdev, 1); } if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) { - dev_err(&pdev->dev, "invalid interrupts\n"); ret = -EINVAL; goto err_free_netdev; } From 82c9ae440857840c56e05d4fb1427ee032531346 Mon Sep 17 00:00:00 2001 From: John Haxby Date: Sat, 18 Apr 2020 16:30:49 +0100 Subject: [PATCH 018/100] ipv6: fix restrict IPV6_ADDRFORM operation Commit b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation") fixed a problem found by syzbot an unfortunate logic error meant that it also broke IPV6_ADDRFORM. Rearrange the checks so that the earlier test is just one of the series of checks made before moving the socket from IPv6 to IPv4. Fixes: b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation") Signed-off-by: John Haxby Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index debdaeba5d8c..18d05403d3b5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -183,15 +183,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol == IPPROTO_TCP) { - if (sk->sk_prot != &tcpv6_prot) { - retv = -EBUSY; - break; - } - break; - } else { + } + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; break; } + if (sk->sk_protocol != IPPROTO_TCP) + break; if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; From f0212a5ebfa6cd789ab47666b9cc169e6e688732 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 18 Apr 2020 19:14:57 +0100 Subject: [PATCH 019/100] net: stmmac: dwmac-meson8b: Add missing boundary to RGMII TX clock array Running with KASAN on a VIM3L systems leads to the following splat when probing the Ethernet device: ================================================================== BUG: KASAN: global-out-of-bounds in _get_maxdiv+0x74/0xd8 Read of size 4 at addr ffffa000090615f4 by task systemd-udevd/139 CPU: 1 PID: 139 Comm: systemd-udevd Tainted: G E 5.7.0-rc1-00101-g8624b7577b9c #781 Hardware name: amlogic w400/w400, BIOS 2020.01-rc5 03/12/2020 Call trace: dump_backtrace+0x0/0x2a0 show_stack+0x20/0x30 dump_stack+0xec/0x148 print_address_description.isra.12+0x70/0x35c __kasan_report+0xfc/0x1d4 kasan_report+0x4c/0x68 __asan_load4+0x9c/0xd8 _get_maxdiv+0x74/0xd8 clk_divider_bestdiv+0x74/0x5e0 clk_divider_round_rate+0x80/0x1a8 clk_core_determine_round_nolock.part.9+0x9c/0xd0 clk_core_round_rate_nolock+0xf0/0x108 clk_hw_round_rate+0xac/0xf0 clk_factor_round_rate+0xb8/0xd0 clk_core_determine_round_nolock.part.9+0x9c/0xd0 clk_core_round_rate_nolock+0xf0/0x108 clk_core_round_rate_nolock+0xbc/0x108 clk_core_set_rate_nolock+0xc4/0x2e8 clk_set_rate+0x58/0xe0 meson8b_dwmac_probe+0x588/0x72c [dwmac_meson8b] platform_drv_probe+0x78/0xd8 really_probe+0x158/0x610 driver_probe_device+0x140/0x1b0 device_driver_attach+0xa4/0xb0 __driver_attach+0xcc/0x1c8 bus_for_each_dev+0xf4/0x168 driver_attach+0x3c/0x50 bus_add_driver+0x238/0x2e8 driver_register+0xc8/0x1e8 __platform_driver_register+0x88/0x98 meson8b_dwmac_driver_init+0x28/0x1000 [dwmac_meson8b] do_one_initcall+0xa8/0x328 do_init_module+0xe8/0x368 load_module+0x3300/0x36b0 __do_sys_finit_module+0x120/0x1a8 __arm64_sys_finit_module+0x4c/0x60 el0_svc_common.constprop.2+0xe4/0x268 do_el0_svc+0x98/0xa8 el0_svc+0x24/0x68 el0_sync_handler+0x12c/0x318 el0_sync+0x158/0x180 The buggy address belongs to the variable: div_table.63646+0x34/0xfffffffffffffa40 [dwmac_meson8b] Memory state around the buggy address: ffffa00009061480: fa fa fa fa 00 00 00 01 fa fa fa fa 00 00 00 00 ffffa00009061500: 05 fa fa fa fa fa fa fa 00 04 fa fa fa fa fa fa >ffffa00009061580: 00 03 fa fa fa fa fa fa 00 00 00 00 00 00 fa fa ^ ffffa00009061600: fa fa fa fa 00 01 fa fa fa fa fa fa 01 fa fa fa ffffa00009061680: fa fa fa fa 00 01 fa fa fa fa fa fa 04 fa fa fa ================================================================== Digging into this indeed shows that the clock divider array is lacking a final fence, and that the clock subsystems goes in the weeds. Oh well. Let's add the empty structure that indicates the end of the array. Fixes: bd6f48546b9c ("net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs") Signed-off-by: Marc Zyngier Cc: Martin Blumenstingl Reviewed-by: Martin Blumenstingl Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 0e2fa14f1423..a3934ca6a043 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -119,6 +119,7 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) { .div = 5, .val = 5, }, { .div = 6, .val = 6, }, { .div = 7, .val = 7, }, + { /* end of array */ } }; clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL); From bd019427bf3623ee3c7d2845cf921bbf4c14846c Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 20 Apr 2020 15:26:54 +0530 Subject: [PATCH 020/100] cxgb4: fix large delays in PTP synchronization Fetching PTP sync information from mailbox is slow and can take up to 10 milliseconds. Reduce this unnecessary delay by directly reading the information from the corresponding registers. Fixes: 9c33e4208bce ("cxgb4: Add PTP Hardware Clock (PHC) support") Signed-off-by: Manoj Malviya Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 27 +++++-------------- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 3 +++ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index af1f40cbccc8..f5bc996ac77d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -311,32 +311,17 @@ static int cxgb4_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) */ static int cxgb4_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { - struct adapter *adapter = (struct adapter *)container_of(ptp, - struct adapter, ptp_clock_info); - struct fw_ptp_cmd c; + struct adapter *adapter = container_of(ptp, struct adapter, + ptp_clock_info); u64 ns; - int err; - memset(&c, 0, sizeof(c)); - c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PTP_CMD) | - FW_CMD_REQUEST_F | - FW_CMD_READ_F | - FW_PTP_CMD_PORTID_V(0)); - c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); - c.u.ts.sc = FW_PTP_SC_GET_TIME; - - err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), &c); - if (err < 0) { - dev_err(adapter->pdev_dev, - "PTP: %s error %d\n", __func__, -err); - return err; - } + ns = t4_read_reg(adapter, T5_PORT_REG(0, MAC_PORT_PTP_SUM_LO_A)); + ns |= (u64)t4_read_reg(adapter, + T5_PORT_REG(0, MAC_PORT_PTP_SUM_HI_A)) << 32; /* convert to timespec*/ - ns = be64_to_cpu(c.u.ts.tm); *ts = ns_to_timespec64(ns); - - return err; + return 0; } /** diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index bb20e50ddb84..4a9fcd6c226c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1906,6 +1906,9 @@ #define MAC_PORT_CFG2_A 0x818 +#define MAC_PORT_PTP_SUM_LO_A 0x990 +#define MAC_PORT_PTP_SUM_HI_A 0x994 + #define MPS_CMN_CTL_A 0x9000 #define COUNTPAUSEMCRX_S 5 From 5e20087d1b678965ae9df01eed03efedc1aef9f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Apr 2020 16:25:04 +0200 Subject: [PATCH 021/100] mptcp: handle mptcp listener destruction via rcu Following splat can occur during self test: BUG: KASAN: use-after-free in subflow_data_ready+0x156/0x160 Read of size 8 at addr ffff888100c35c28 by task mptcp_connect/4808 subflow_data_ready+0x156/0x160 tcp_child_process+0x6a3/0xb30 tcp_v4_rcv+0x2231/0x3730 ip_protocol_deliver_rcu+0x5c/0x860 ip_local_deliver_finish+0x220/0x360 ip_local_deliver+0x1c8/0x4e0 ip_rcv_finish+0x1da/0x2f0 ip_rcv+0xd0/0x3c0 __netif_receive_skb_one_core+0xf5/0x160 __netif_receive_skb+0x27/0x1c0 process_backlog+0x21e/0x780 net_rx_action+0x35f/0xe90 do_softirq+0x4c/0x50 [..] This occurs when accessing subflow_ctx->conn. Problem is that tcp_child_process() calls listen sockets' sk_data_ready() notification, but it doesn't hold the listener lock. Another cpu calling close() on the listener will then cause transition of refcount to 0. Fixes: 58b09919626bf ("mptcp: create msk early") Signed-off-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e816c733ccb..b57974a6b6cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1378,6 +1378,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->ack_seq = ack_seq; } + sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); bh_unlock_sock(nsk); @@ -1779,6 +1780,8 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } + sock_set_flag(sock->sk, SOCK_RCU_FREE); + err = ssock->ops->listen(ssock, backlog); inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); if (!err) From 4c8941de781cf71388d1490c6b85a02d1cec1ef4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 20 Apr 2020 16:25:05 +0200 Subject: [PATCH 022/100] mptcp: avoid flipping mp_capable field in syn_recv_sock() If multiple CPUs races on the same req_sock in syn_recv_sock(), flipping such field can cause inconsistent child socket status. When racing, the CPU losing the req ownership may still change the mptcp request socket mp_capable flag while the CPU owning the request is cloning the socket, leaving the child socket with 'is_mptcp' set but no 'mp_capable' flag. Such socket will stay with 'conn' field cleared, heading to oops in later mptcp callback. Address the issue tracking the fallback status in a local variable. Fixes: 58b09919626b ("mptcp: create msk early") Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 46 +++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bc46b5091b9d..4fa190368507 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -376,6 +376,17 @@ static void mptcp_force_close(struct sock *sk) sk_common_release(sk); } +static void subflow_ulp_fallback(struct sock *sk, + struct mptcp_subflow_context *old_ctx) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + mptcp_subflow_tcp_fallback(sk, old_ctx); + icsk->icsk_ulp_ops = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tcp_sk(sk)->is_mptcp = 0; +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -388,6 +399,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct tcp_options_received opt_rx; bool fallback_is_fatal = false; struct sock *new_msk = NULL; + bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -412,14 +424,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_req->remote_key = opt_rx.mptcp.sndr_key; subflow_req->remote_key_valid = 1; } else { - subflow_req->mp_capable = 0; + fallback = true; goto create_child; } create_msk: new_msk = mptcp_sk_clone(listener->conn, req); if (!new_msk) - subflow_req->mp_capable = 0; + fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; opt_rx.mptcp.mp_join = 0; @@ -438,12 +450,18 @@ create_child: if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); - /* we have null ctx on TCP fallback, which is fatal on - * MPJ handshake + /* we need to fallback on ctx allocation failure and on pre-reqs + * checking above. In the latter scenario we additionally need + * to reset the context to non MPTCP status. */ - if (!ctx) { + if (!ctx || fallback) { if (fallback_is_fatal) goto close_child; + + if (ctx) { + subflow_ulp_fallback(child, ctx); + kfree_rcu(ctx, rcu); + } goto out; } @@ -474,6 +492,13 @@ out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) mptcp_force_close(new_msk); + + /* check for expected invariant - should never trigger, just help + * catching eariler subtle bugs + */ + WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp && + (!mptcp_subflow_ctx(child) || + !mptcp_subflow_ctx(child)->conn)); return child; close_child: @@ -1076,17 +1101,6 @@ static void subflow_ulp_release(struct sock *sk) kfree_rcu(ctx, rcu); } -static void subflow_ulp_fallback(struct sock *sk, - struct mptcp_subflow_context *old_ctx) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - mptcp_subflow_tcp_fallback(sk, old_ctx); - icsk->icsk_ulp_ops = NULL; - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tcp_sk(sk)->is_mptcp = 0; -} - static void subflow_ulp_clone(const struct request_sock *req, struct sock *newsk, const gfp_t priority) From fca5c82c086ea3871b103618b80558c479c8e597 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 20 Apr 2020 16:25:06 +0200 Subject: [PATCH 023/100] mptcp: drop req socket remote_key* fields We don't need them, as we can use the current ingress opt data instead. Setting them in syn_recv_sock() may causes inconsistent mptcp socket status, as per previous commit. Fixes: cc7972ea1932 ("mptcp: parse and emit MP_CAPABLE option according to v1 spec") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 +++++--- net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b57974a6b6cc..b22a63ba2348 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1332,7 +1332,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct tcp_options_received *opt_rx, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -1370,9 +1372,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (subflow_req->remote_key_valid) { + if (opt_rx->mptcp.mp_capable) { msk->can_ack = true; - msk->remote_key = subflow_req->remote_key; + msk->remote_key = opt_rx->mptcp.sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67448002a2d7..a2b3048037d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -206,12 +206,10 @@ struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1, - remote_key_valid : 1; + backup : 1; u8 local_id; u8 remote_id; u64 local_key; - u64 remote_key; u64 idsn; u32 token; u32 ssn_offset; @@ -332,7 +330,9 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct tcp_options_received *opt_rx, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4fa190368507..fabd06f2ff45 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -133,7 +133,6 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 0; subflow_req->mp_join = 0; - subflow_req->remote_key_valid = 0; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -404,6 +403,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + opt_rx.mptcp.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -418,18 +418,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - opt_rx.mptcp.mp_capable = 0; mptcp_get_options(skb, &opt_rx); - if (opt_rx.mptcp.mp_capable) { - subflow_req->remote_key = opt_rx.mptcp.sndr_key; - subflow_req->remote_key_valid = 1; - } else { + if (!opt_rx.mptcp.mp_capable) { fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, req); + new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { @@ -473,6 +469,13 @@ create_child: mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; + + /* with OoO packets we can reach here without ingress + * mpc option + */ + ctx->remote_key = opt_rx.mptcp.sndr_key; + ctx->fully_established = opt_rx.mptcp.mp_capable; + ctx->can_ack = opt_rx.mptcp.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -1134,9 +1137,6 @@ static void subflow_ulp_clone(const struct request_sock *req, * is fully established only after we receive the remote key */ new_ctx->mp_capable = 1; - new_ctx->fully_established = subflow_req->remote_key_valid; - new_ctx->can_ack = subflow_req->remote_key_valid; - new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; new_ctx->token = subflow_req->token; new_ctx->ssn_offset = subflow_req->ssn_offset; From 1c30fbc76b8f0c07c92a8ca4cd7c456612e17eb5 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 20 Apr 2020 15:01:33 +0000 Subject: [PATCH 024/100] team: fix hang in team_mode_get() When team mode is changed or set, the team_mode_get() is called to check whether the mode module is inserted or not. If the mode module is not inserted, it calls the request_module(). In the request_module(), it creates a child process, which is the "modprobe" process and waits for the done of the child process. At this point, the following locks were used. down_read(&cb_lock()); by genl_rcv() genl_lock(); by genl_rcv_msc() rtnl_lock(); by team_nl_cmd_options_set() mutex_lock(&team->lock); by team_nl_team_get() Concurrently, the team module could be removed by rmmod or "modprobe -r" The __exit function of team module is team_module_exit(), which calls team_nl_fini() and it tries to acquire following locks. down_write(&cb_lock); genl_lock(); Because of the genl_lock() and cb_lock, this process can't be finished earlier than request_module() routine. The problem secenario. CPU0 CPU1 team_mode_get request_module() modprobe -r team_mode_roundrobin team <--(B) modprobe team <--(A) team_mode_roundrobin By request_module(), the "modprobe team_mode_roundrobin" command will be executed. At this point, the modprobe process will decide that the team module should be inserted before team_mode_roundrobin. Because the team module is being removed. By the module infrastructure, the same module insert/remove operations can't be executed concurrently. So, (A) waits for (B) but (B) also waits for (A) because of locks. So that the hang occurs at this point. Test commands: while : do teamd -d & killall teamd & modprobe -rv team_mode_roundrobin & done The approach of this patch is to hold the reference count of the team module if the team module is compiled as a module. If the reference count of the team module is not zero while request_module() is being called, the team module will not be removed at that moment. So that the above scenario could not occur. Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Taehee Yoo Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 4004f98e50d9..04845a4017f9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -468,6 +468,9 @@ static const struct team_mode *team_mode_get(const char *kind) struct team_mode_item *mitem; const struct team_mode *mode = NULL; + if (!try_module_get(THIS_MODULE)) + return NULL; + spin_lock(&mode_list_lock); mitem = __find_mode(kind); if (!mitem) { @@ -483,6 +486,7 @@ static const struct team_mode *team_mode_get(const char *kind) } spin_unlock(&mode_list_lock); + module_put(THIS_MODULE); return mode; } From a019b36123aec9700b21ae0724710f62928a8bc1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 9 Apr 2020 09:46:20 +0200 Subject: [PATCH 025/100] net/mlx5: Fix failing fw tracer allocation on s390 On s390 FORCE_MAX_ZONEORDER is 9 instead of 11, thus a larger kzalloc() allocation as done for the firmware tracer will always fail. Looking at mlx5_fw_tracer_save_trace(), it is actually the driver itself that copies the debug data into the trace array and there is no need for the allocation to be contiguous in physical memory. We can therefor use kvzalloc() instead of kzalloc() and get rid of the large contiguous allcoation. Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") Signed-off-by: Niklas Schnelle Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index c9c9b479bda5..5ce6ebbc7f10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -935,7 +935,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) return NULL; } - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) return ERR_PTR(-ENOMEM); @@ -982,7 +982,7 @@ destroy_workqueue: tracer->dev = NULL; destroy_workqueue(tracer->work_queue); free_tracer: - kfree(tracer); + kvfree(tracer); return ERR_PTR(err); } @@ -1061,7 +1061,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) mlx5_fw_tracer_destroy_log_buf(tracer); flush_workqueue(tracer->work_queue); destroy_workqueue(tracer->work_queue); - kfree(tracer); + kvfree(tracer); } static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) From 70840b66da4d3b9a8962905d9111a53ee628beb3 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 6 Apr 2020 15:47:52 +0300 Subject: [PATCH 026/100] net/mlx5: CT: Change idr to xarray to protect parallel tuple id allocation After allowing parallel tuple insertion, we get the following trace: [ 5505.142249] ------------[ cut here ]------------ [ 5505.148155] WARNING: CPU: 21 PID: 13313 at lib/radix-tree.c:581 delete_node+0x16c/0x180 [ 5505.295553] CPU: 21 PID: 13313 Comm: kworker/u50:22 Tainted: G OE 5.6.0+ #78 [ 5505.304824] Hardware name: Supermicro Super Server/X10DRT-P, BIOS 2.0b 03/30/2017 [ 5505.313740] Workqueue: nf_flow_table_offload flow_offload_work_handler [nf_flow_table] [ 5505.323257] RIP: 0010:delete_node+0x16c/0x180 [ 5505.349862] RSP: 0018:ffffb19184eb7b30 EFLAGS: 00010282 [ 5505.356785] RAX: 0000000000000000 RBX: ffff904ac95b86d8 RCX: ffff904b6f938838 [ 5505.365190] RDX: 0000000000000000 RSI: ffff904ac954b908 RDI: ffff904ac954b920 [ 5505.373628] RBP: ffff904b4ac13060 R08: 0000000000000001 R09: 0000000000000000 [ 5505.382155] R10: 0000000000000000 R11: 0000000000000040 R12: 0000000000000000 [ 5505.390527] R13: ffffb19184eb7bfc R14: ffff904b6bef5800 R15: ffff90482c1203c0 [ 5505.399246] FS: 0000000000000000(0000) GS:ffff904c2fc80000(0000) knlGS:0000000000000000 [ 5505.408621] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5505.415739] CR2: 00007f5d27006010 CR3: 0000000058c10006 CR4: 00000000001626e0 [ 5505.424547] Call Trace: [ 5505.428429] idr_alloc_u32+0x7b/0xc0 [ 5505.433803] mlx5_tc_ct_entry_add_rule+0xbf/0x950 [mlx5_core] [ 5505.441354] ? mlx5_fc_create+0x23c/0x370 [mlx5_core] [ 5505.448225] mlx5_tc_ct_block_flow_offload+0x874/0x10b0 [mlx5_core] [ 5505.456278] ? mlx5_tc_ct_block_flow_offload+0x63d/0x10b0 [mlx5_core] [ 5505.464532] nf_flow_offload_tuple.isra.21+0xc5/0x140 [nf_flow_table] [ 5505.472286] ? __kmalloc+0x217/0x2f0 [ 5505.477093] ? flow_rule_alloc+0x1c/0x30 [ 5505.482117] flow_offload_work_handler+0x1d0/0x290 [nf_flow_table] [ 5505.489674] ? process_one_work+0x17c/0x580 [ 5505.494922] process_one_work+0x202/0x580 [ 5505.500082] ? process_one_work+0x17c/0x580 [ 5505.505696] worker_thread+0x4c/0x3f0 [ 5505.510458] kthread+0x103/0x140 [ 5505.514989] ? process_one_work+0x580/0x580 [ 5505.520616] ? kthread_bind+0x10/0x10 [ 5505.525837] ret_from_fork+0x3a/0x50 [ 5505.570841] ---[ end trace 07995de9c56d6831 ]--- This happens from parallel deletes/adds to idr, as idr isn't protected. Fix that by using xarray as the tuple_ids allocator instead of idr. Fixes: 7da182a998d6 ("netfilter: flowtable: Use work entry per offload command") Reviewed-by: Roi Dayan Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 16416eaac39e..a172c5e39710 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "esw/chains.h" #include "en/tc_ct.h" @@ -35,7 +36,7 @@ struct mlx5_tc_ct_priv { struct mlx5_eswitch *esw; const struct net_device *netdev; struct idr fte_ids; - struct idr tuple_ids; + struct xarray tuple_ids; struct rhashtable zone_ht; struct mlx5_flow_table *ct; struct mlx5_flow_table *ct_nat; @@ -238,7 +239,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); - idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); + xa_erase(&ct_priv->tuple_ids, zone_rule->tupleid); } static void @@ -483,7 +484,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_esw_flow_attr *attr = &zone_rule->attr; struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_flow_spec *spec = NULL; - u32 tupleid = 1; + u32 tupleid; int err; zone_rule->nat = nat; @@ -493,12 +494,12 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, return -ENOMEM; /* Get tuple unique id */ - err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, - TUPLE_ID_MAX, GFP_KERNEL); + err = xa_alloc(&ct_priv->tuple_ids, &tupleid, zone_rule, + XA_LIMIT(1, TUPLE_ID_MAX), GFP_KERNEL); if (err) { netdev_warn(ct_priv->netdev, "Failed to allocate tuple id, err: %d\n", err); - goto err_idr_alloc; + goto err_xa_alloc; } zone_rule->tupleid = tupleid; @@ -539,8 +540,8 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err_rule: mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); err_mod_hdr: - idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); -err_idr_alloc: + xa_erase(&ct_priv->tuple_ids, zone_rule->tupleid); +err_xa_alloc: kfree(spec); return err; } @@ -1299,7 +1300,7 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) } idr_init(&ct_priv->fte_ids); - idr_init(&ct_priv->tuple_ids); + xa_init_flags(&ct_priv->tuple_ids, XA_FLAGS_ALLOC1); mutex_init(&ct_priv->control_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); @@ -1334,7 +1335,7 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - idr_destroy(&ct_priv->tuple_ids); + xa_destroy(&ct_priv->tuple_ids); idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); @@ -1352,7 +1353,7 @@ mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, if (!ct_priv || !tupleid) return true; - zone_rule = idr_find(&ct_priv->tuple_ids, tupleid); + zone_rule = xa_load(&ct_priv->tuple_ids, tupleid); if (!zone_rule) return false; From e7e0004abdd6f83ae4be5613b29ed396beff576c Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 11 Feb 2020 16:02:35 +0200 Subject: [PATCH 027/100] net/mlx5e: Don't trigger IRQ multiple times on XSK wakeup to avoid WQ overruns XSK wakeup function triggers NAPI by posting a NOP WQE to a special XSK ICOSQ. When the application floods the driver with wakeup requests by calling sendto() in a certain pattern that ends up in mlx5e_trigger_irq, the XSK ICOSQ may overflow. Multiple NOPs are not required and won't accelerate the process, so avoid posting a second NOP if there is one already on the way. This way we also avoid increasing the queue size (which might not help anyway). Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++++--- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 6 +++++- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 12a61bf82c14..23701c0e36ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -367,6 +367,7 @@ enum { MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, + MLX5E_SQ_STATE_PENDING_XSK_TX, }; struct mlx5e_sq_wqe_info { @@ -960,7 +961,7 @@ void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); -void mlx5e_poll_ico_cq(struct mlx5e_cq *cq); +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index fe2d596cb361..3bcdb5b2fc20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -33,6 +33,9 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state))) return 0; + if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->xskicosq.state)) + return 0; + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); spin_unlock(&c->xskicosq_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6173faf542b0..e2beb89c1832 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -589,7 +589,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } -void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; @@ -597,11 +597,11 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) int i; if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return; + return 0; cqe = mlx5_cqwq_get_cqe(&cq->wq); if (likely(!cqe)) - return; + return 0; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur @@ -650,6 +650,8 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; mlx5_cqwq_update_db_record(&cq->wq); + + return i; } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 87c49e7a164c..acb20215a33b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -152,7 +152,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_post_rx_wqes, rq); if (xsk_open) { - mlx5e_poll_ico_cq(&c->xskicosq.cq); + if (mlx5e_poll_ico_cq(&c->xskicosq.cq)) + /* Don't clear the flag if nothing was polled to prevent + * queueing more WQEs and overflowing XSKICOSQ. + */ + clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->xskicosq.state); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); } From 96c34151d15779bcac4022e0f10e881ab33fd80f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 16 Apr 2020 15:04:02 -0700 Subject: [PATCH 028/100] net/mlx5: Kconfig: convert imply usage to weak dependency MLX5_CORE uses the 'imply' keyword to depend on VXLAN, PTP_1588_CLOCK, MLXFW and PCI_HYPERV_INTERFACE. This was useful to force vxlan, ptp, etc.. to be reachable to mlx5 regardless of their config states. Due to the changes in the cited commit below, the semantics of 'imply' was changed to not force any restriction on the implied config. As a result of this change, the compilation of MLX5_CORE=y and VXLAN=m would result in undefined references, as VXLAN now would stay as 'm'. To fix this we change MLX5_CORE to have a weak dependency on these modules/configs and make sure they are reachable, by adding: depend on symbol || !symbol. For example: VXLAN=m MLX5_CORE=y, this will force MLX5_CORE to m Fixes: def2fbffe62c ("kconfig: allow symbols implied by y to become m") Signed-off-by: Saeed Mahameed Cc: Masahiro Yamada Cc: Nicolas Pitre Cc: Arnd Bergmann Reported-by: Randy Dunlap --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 312e0a1ad43d..7d69a3061f17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -7,10 +7,10 @@ config MLX5_CORE tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" depends on PCI select NET_DEVLINK - imply PTP_1588_CLOCK - imply VXLAN - imply MLXFW - imply PCI_HYPERV_INTERFACE + depends on VXLAN || !VXLAN + depends on MLXFW || !MLXFW + depends on PTP_1588_CLOCK || !PTP_1588_CLOCK + depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE default n ---help--- Core driver for low level functionality of the ConnectX-4 and From dcdf4ce0ff4ba206fc362e149c8ae81d6a2f849c Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 8 Apr 2020 14:51:52 +0800 Subject: [PATCH 029/100] net/mlx5e: Get the latest values from counters in switchdev mode In the switchdev mode, when running "cat /sys/class/net/NIC/statistics/tx_packets", the ppcnt register is accessed to get the latest values. But currently this command can not get the correct values from ppcnt. From firmware manual, before getting the 802_3 counters, the 802_3 data layout should be set to the ppcnt register. When the command "cat /sys/class/net/NIC/statistics/tx_packets" is run, before updating 802_3 data layout with ppcnt register, the monitor counters are tested. The test result will decide the 802_3 data layout is updated or not. Actually the monitor counters do not support to monitor rx/tx stats of 802_3 in switchdev mode. So the rx/tx counters change will not trigger monitor counters. So the 802_3 data layout will not be updated in ppcnt register. Finally this command can not get the latest values from ppcnt register with 802_3 data layout. Fixes: 5c7e8bbb0257 ("net/mlx5e: Use monitor counters for update stats") Signed-off-by: Zhu Yanjun Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f02150a97ac8..b314adf438da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3583,7 +3583,12 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; - if (!mlx5e_monitor_counter_supported(priv)) { + /* In switchdev mode, monitor counters doesn't monitor + * rx/tx stats of 802_3. The update stats mechanism + * should keep the 802_3 layout counters updated + */ + if (!mlx5e_monitor_counter_supported(priv) || + mlx5e_is_uplink_rep(priv)) { /* update HW stats in background for next time */ mlx5e_queue_update_stats(priv); } From bc23d0e3f717ced21fbfacab3ab887d55e5ba367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 16 Apr 2020 10:31:20 +0200 Subject: [PATCH 030/100] cpumap: Avoid warning when CONFIG_DEBUG_PER_CPU_MAPS is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the kernel is built with CONFIG_DEBUG_PER_CPU_MAPS, the cpumap code can trigger a spurious warning if CONFIG_CPUMASK_OFFSTACK is also set. This happens because in this configuration, NR_CPUS can be larger than nr_cpumask_bits, so the initial check in cpu_map_alloc() is not sufficient to guard against hitting the warning in cpumask_check(). Fix this by explicitly checking the supplied key against the nr_cpumask_bits variable before calling cpu_possible(). Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") Reported-by: Xiumei Mu Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Tested-by: Xiumei Mu Acked-by: Jesper Dangaard Brouer Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200416083120.453718-1-toke@redhat.com --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 70f71b154fa5..3fe0b006d2d2 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -469,7 +469,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, return -EOVERFLOW; /* Make sure CPU is a valid possible cpu */ - if (!cpu_possible(key_cpu)) + if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu)) return -ENODEV; if (qsize == 0) { From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 17 Apr 2020 02:00:06 +0200 Subject: [PATCH 031/100] bpf: Forbid XADD on spilled pointers for unprivileged users When check_xadd() verifies an XADD operation on a pointer to a stack slot containing a spilled pointer, check_stack_read() verifies that the read, which is part of XADD, is valid. However, since the placeholder value -1 is passed as `value_regno`, check_stack_read() can only return a binary decision and can't return the type of the value that was read. The intent here is to verify whether the value read from the stack slot may be used as a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and the type information is lost when check_stack_read() returns, this is not enforced, and a malicious user can abuse XADD to leak spilled kernel pointers. Fix it by letting check_stack_read() verify that the value is usable as a SCALAR_VALUE if no type information is passed to the caller. To be able to use __is_pointer_value() in check_stack_read(), move it up. Fix up the expected unprivileged error message for a BPF selftest that, until now, assumed that unprivileged users can use XADD on stack-spilled pointers. This also gives us a test for the behavior introduced in this patch for free. In theory, this could also be fixed by forbidding XADD on stack spills entirely, since XADD is a locked operation (for operations on memory with concurrency) and there can't be any concurrency on the BPF stack; but Alexei has said that he wants to keep XADD on stack slots working to avoid changes to the test suite [1]. The following BPF program demonstrates how to leak a BPF map pointer as an unprivileged user using this bug: // r7 = map_pointer BPF_LD_MAP_FD(BPF_REG_7, small_map), // r8 = launder(map_pointer) BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8), BPF_MOV64_IMM(BPF_REG_1, 0), ((struct bpf_insn) { .code = BPF_STX | BPF_DW | BPF_XADD, .dst_reg = BPF_REG_FP, .src_reg = BPF_REG_1, .off = -8 }), BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8), // store r8 into map BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7), BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4), BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() [1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.thefacebook.com/ Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com --- kernel/bpf/verifier.c | 28 +++++++++++++------ .../bpf/verifier/value_illegal_alu.c | 1 + 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 38cfcf701eeb..9e92d3d5ffd1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg) return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); } +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) +{ + if (allow_ptr_leaks) + return false; + + return reg->type != SCALAR_VALUE; +} + static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg) { @@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env, * which resets stack/reg liveness for state transitions */ state->regs[value_regno].live |= REG_LIVE_WRITTEN; + } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { + /* If value_regno==-1, the caller is asking us whether + * it is acceptable to use this value as a SCALAR_VALUE + * (e.g. for XADD). + * We must not allow unprivileged callers to do that + * with spilled pointers. + */ + verbose(env, "leaking pointer from stack off %d\n", + off); + return -EACCES; } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { @@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } -static bool __is_pointer_value(bool allow_ptr_leaks, - const struct bpf_reg_state *reg) -{ - if (allow_ptr_leaks) - return false; - - return reg->type != SCALAR_VALUE; -} - static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) { return cur_regs(env) + regno; diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c index 7f6c232cd842..ed1c2cea1dea 100644 --- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c +++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c @@ -88,6 +88,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "leaking pointer from stack off -8", .errstr = "R0 invalid mem access 'inv'", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, From 8ff3571f7e1bf3f293cc5e3dc14f2943f4fa7fcf Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 17 Apr 2020 02:00:07 +0200 Subject: [PATCH 032/100] bpf: Fix handling of XADD on BTF memory check_xadd() can cause check_ptr_to_btf_access() to be executed with atype==BPF_READ and value_regno==-1 (meaning "just check whether the access is okay, don't tell me what type it will result in"). Handle that case properly and skip writing type information, instead of indexing into the registers at index -1 and writing into out-of-bounds memory. Note that at least at the moment, you can't actually write through a BTF pointer, so check_xadd() will reject the program after calling check_ptr_to_btf_access with atype==BPF_WRITE; but that's after the verifier has already corrupted memory. This patch assumes that BTF pointers are not available in unprivileged programs. Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200417000007.10734-2-jannh@google.com --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e92d3d5ffd1..9382609147f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3099,7 +3099,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - if (atype == BPF_READ) { + if (atype == BPF_READ && value_regno >= 0) { if (ret == SCALAR_VALUE) { mark_reg_unknown(env, regs, value_regno); return 0; From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Sat, 18 Apr 2020 16:26:53 -0700 Subject: [PATCH 033/100] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX BPF_B This patch fixes an encoding bug in emit_stx for BPF_B when the source register is BPF_REG_FP. The current implementation for BPF_STX BPF_B in emit_stx saves one REX byte when the operands can be encoded using Mod-R/M alone. The lower 8 bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers, (e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit equivalents (%sil, %dil, %bpl, %spl). The current code checks if the source for BPF_STX BPF_B is BPF_REG_1 or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the required REX prefix. However, it misses the case when the source is BPF_REG_FP (mapped to %rbp). The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand will read from register %ch instead of the correct %bpl. This patch fixes the problem by fixing and refactoring the check on which registers need the extra REX byte. Since no BPF registers map to %rsp, there is no need to handle %spl. Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT") Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com --- arch/x86/net/bpf_jit_comp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5ea7c2cf7ab4..42b6709e6dc7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -158,6 +158,19 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +/* + * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 + * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte + * of encoding. al,cl,dl,bl have simpler encoding. + */ +static bool is_ereg_8l(u32 reg) +{ + return is_ereg(reg) || + (1 << reg) & (BIT(BPF_REG_1) | + BIT(BPF_REG_2) | + BIT(BPF_REG_FP)); +} + static bool is_axreg(u32 reg) { return reg == BPF_REG_0; @@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) switch (size) { case BPF_B: /* Emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* We have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) + /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); From d2b6c3ab70dbc0069a69c57edd8c96f365f06b7c Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Sat, 18 Apr 2020 16:26:54 -0700 Subject: [PATCH 034/100] bpf, selftests: Add test for BPF_STX BPF_B storing R10 This patch adds a test to test_verifier that writes the lower 8 bits of R10 (aka FP) using BPF_B to an array map and reads the result back. The expected behavior is that the result should be the same as first copying R10 to R9, and then storing / loading the lower 8 bits of R9. This test catches a bug that was present in the x86-64 JIT that caused an incorrect encoding for BPF_STX BPF_B when the source operand is R10. Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200418232655.23870-2-luke.r.nels@gmail.com --- .../selftests/bpf/verifier/stack_ptr.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 7276620ef242..8bfeb77c60bd 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -315,3 +315,43 @@ }, .result = ACCEPT, }, +{ + "store PTR_TO_STACK in R10 to array map using BPF_B", + .insns = { + /* Load pointer to map. */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* Copy R10 to R9. */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), + /* Pollute other registers with unaligned values. */ + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_MOV64_IMM(BPF_REG_4, -1), + BPF_MOV64_IMM(BPF_REG_5, -1), + BPF_MOV64_IMM(BPF_REG_6, -1), + BPF_MOV64_IMM(BPF_REG_7, -1), + BPF_MOV64_IMM(BPF_REG_8, -1), + /* Store both R9 and R10 with BPF_B and read back. */ + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0), + /* Should read back as same value. */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 42, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, From a176e114ace4cca7df0e34b4bd90c301cdc6d653 Mon Sep 17 00:00:00 2001 From: Chris Rorvick Date: Fri, 17 Apr 2020 09:45:58 +0200 Subject: [PATCH 035/100] iwlwifi: actually check allocated conf_tlv pointer Commit 71bc0334a637 ("iwlwifi: check allocated pointer when allocating conf_tlvs") attempted to fix a typoe introduced by commit 17b809c9b22e ("iwlwifi: dbg: move debug data to a struct") but does not implement the check correctly. Fixes: 71bc0334a637 ("iwlwifi: check allocated pointer when allocating conf_tlvs") Tweeted-by: @grsecurity Signed-off-by: Chris Rorvick Signed-off-by: Sedat Dilek Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200417074558.12316-1-sedat.dilek@gmail.com --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index ff52e69c1c80..eeb750bdbda1 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1467,7 +1467,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) kmemdup(pieces->dbg_conf_tlv[i], pieces->dbg_conf_tlv_len[i], GFP_KERNEL); - if (!pieces->dbg_conf_tlv[i]) + if (!drv->fw.dbg.conf_tlv[i]) goto out_free_fw; } } From b98b33d5560a2d940f3b80f6768a6177bf3dfbc0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 10:08:09 +0300 Subject: [PATCH 036/100] iwlwifi: pcie: actually release queue memory in TVQM The iwl_trans_pcie_dyn_txq_free() function only releases the frames that may be left on the queue by calling iwl_pcie_gen2_txq_unmap(), but doesn't actually free the DMA ring or byte-count tables for the queue. This leads to pretty large memory leaks (at least before my queue size improvements), in particular in monitor/sniffer mode on channel hopping since this happens on every channel change. This was also now more evident after the move to a DMA pool for the byte count tables, showing messages such as BUG iwlwifi:bc (...): Objects remaining in iwlwifi:bc on __kmem_cache_shutdown() This fixes https://bugzilla.kernel.org/show_bug.cgi?id=206811. Signed-off-by: Johannes Berg Fixes: 6b35ff91572f ("iwlwifi: pcie: introduce a000 TX queues management") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.f5f4c4193ec1.Id5feebc9b4318041913a9c89fc1378bb5454292c@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 86fc00167817..9664dbc70ef1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -1418,6 +1418,9 @@ void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue) iwl_pcie_gen2_txq_unmap(trans, queue); + iwl_pcie_gen2_txq_free_memory(trans, trans_pcie->txq[queue]); + trans_pcie->txq[queue] = NULL; + IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); } From 290d5e4951832e39d10f4184610dbf09038f8483 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Fri, 17 Apr 2020 10:08:10 +0300 Subject: [PATCH 037/100] iwlwifi: mvm: beacon statistics shouldn't go backwards We reset statistics also in case that we didn't reassoc so in this cases keep last beacon counter. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.1f9142751fbc.Ifbfd0f928a0a761110b8f4f2ca5483a61fb21131@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 5ee33c8ae9d2..77b8def26edb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -8,7 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -566,6 +566,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, struct iwl_mvm_stat_data { struct iwl_mvm *mvm; + __le32 flags; __le32 mac_id; u8 beacon_filter_average_energy; void *general; @@ -606,6 +607,13 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, -general->beacon_average_energy[vif_id]; } + /* make sure that beacon statistics don't go backwards with TCM + * request to clear statistics + */ + if (le32_to_cpu(data->flags) & IWL_STATISTICS_REPLY_FLG_CLEAR) + mvmvif->beacon_stats.accu_num_beacons += + mvmvif->beacon_stats.num_beacons; + if (mvmvif->id != id) return; @@ -763,6 +771,7 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm, flags = stats->flag; } + data.flags = flags; iwl_mvm_rx_stats_check_trigger(mvm, pkt); From d8d663970234fe885f29edf4f06394d0928c89f4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 10:08:11 +0300 Subject: [PATCH 038/100] iwlwifi: pcie: indicate correct RB size to device In the context info, we need to indicate the correct RB size to the device so that it will not think we have 4k when we only use 2k. This seems to not have caused any issues right now, likely because the hardware no longer supports putting multiple entries into a single RB, and practically all of the entries should be smaller than 2k. Nevertheless, it's a bug, and we must advertise the right size to the device. Note that right now we can only tell it 2k vs. 4k, so for the cases where we have more, still use 4k. This needs to be fixed by the firmware first. Signed-off-by: Johannes Berg Fixes: cfdc20efebdc ("iwlwifi: pcie: use partial pages if applicable") Cc: stable@vger.kernel.org # v5.6 Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.ae6cd345764f.I0985c55223decf70182b9ef1d8edf4179f537853@changeid --- .../intel/iwlwifi/pcie/ctxt-info-gen3.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 01f248ba8fec..9d5b1e51b50d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -129,6 +129,18 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, int cmdq_size = max_t(u32, IWL_CMD_QUEUE_SIZE, trans->cfg->min_txq_size); + switch (trans_pcie->rx_buf_size) { + case IWL_AMSDU_DEF: + return -EINVAL; + case IWL_AMSDU_2K: + break; + case IWL_AMSDU_4K: + case IWL_AMSDU_8K: + case IWL_AMSDU_12K: + control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_4K; + break; + } + /* Allocate prph scratch */ prph_scratch = dma_alloc_coherent(trans->dev, sizeof(*prph_scratch), &trans_pcie->prph_scratch_dma_addr, @@ -143,10 +155,8 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, cpu_to_le16((u16)iwl_read32(trans, CSR_HW_REV)); prph_sc_ctrl->version.size = cpu_to_le16(sizeof(*prph_scratch) / 4); - control_flags = IWL_PRPH_SCRATCH_RB_SIZE_4K | - IWL_PRPH_SCRATCH_MTR_MODE | - (IWL_PRPH_MTR_FORMAT_256B & - IWL_PRPH_SCRATCH_MTR_FORMAT); + control_flags |= IWL_PRPH_SCRATCH_MTR_MODE; + control_flags |= IWL_PRPH_MTR_FORMAT_256B & IWL_PRPH_SCRATCH_MTR_FORMAT; /* initialize RX default queue */ prph_sc_ctrl->rbd_cfg.free_rbd_addr = From e5b72e3bc4763152e24bf4b8333bae21cc526c56 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 10:08:12 +0300 Subject: [PATCH 039/100] iwlwifi: mvm: limit maximum queue appropriately Due to some hardware issues, queue 31 isn't usable on devices that have 32 queues (7000, 8000, 9000 families), which is correctly reflected in the configuration and TX queue initialization. However, the firmware API and queue allocation code assumes that there are 32 queues, and if something actually attempts to use #31 this leads to a NULL-pointer dereference since it's not allocated. Fix this by limiting to 31 in the IWL_MVM_DQA_MAX_DATA_QUEUE, and also add some code to catch this earlier in the future, if the configuration changes perhaps. Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.98a79be2db6a.I3a4af6b03b87a6bc18db9b1ff9a812f397bee1fc@changeid --- drivers/net/wireless/intel/iwlwifi/fw/api/txq.h | 6 +++--- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h index 73196cbc7fbe..75d958bab0e3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h @@ -8,7 +8,7 @@ * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2019 Intel Corporation + * Copyright(c) 2019 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2019 Intel Corporation + * Copyright(c) 2019 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -99,7 +99,7 @@ enum iwl_mvm_dqa_txq { IWL_MVM_DQA_MAX_MGMT_QUEUE = 8, IWL_MVM_DQA_AP_PROBE_RESP_QUEUE = 9, IWL_MVM_DQA_MIN_DATA_QUEUE = 10, - IWL_MVM_DQA_MAX_DATA_QUEUE = 31, + IWL_MVM_DQA_MAX_DATA_QUEUE = 30, }; enum iwl_mvm_tx_fifo { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 64ef3f3ba23b..251d6fbb1da5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -722,6 +722,11 @@ static int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, lockdep_assert_held(&mvm->mutex); + if (WARN(maxq >= mvm->trans->trans_cfg->base_params->num_of_queues, + "max queue %d >= num_of_queues (%d)", maxq, + mvm->trans->trans_cfg->base_params->num_of_queues)) + maxq = mvm->trans->trans_cfg->base_params->num_of_queues - 1; + /* This should not be hit with new TX path */ if (WARN_ON(iwl_mvm_has_new_tx_api(mvm))) return -ENOSPC; From 38af8d5a90a8c3b41ff0484855e24bd55b43ce9d Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 17 Apr 2020 10:08:13 +0300 Subject: [PATCH 040/100] iwlwifi: mvm: Do not declare support for ACK Enabled Aggregation As this was not supposed to be enabled to begin with. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.53dbc3c6c36b.Idfe118546b92cc31548b2211472a5303c7de5909@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 9e9810d2b262..ccf0bc16465d 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -532,8 +532,7 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = { IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, .mac_cap_info[2] = - IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP | - IEEE80211_HE_MAC_CAP2_ACK_EN, + IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP, .mac_cap_info[3] = IEEE80211_HE_MAC_CAP3_OMI_CONTROL | IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2, @@ -617,8 +616,7 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = { IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, .mac_cap_info[2] = - IEEE80211_HE_MAC_CAP2_BSR | - IEEE80211_HE_MAC_CAP2_ACK_EN, + IEEE80211_HE_MAC_CAP2_BSR, .mac_cap_info[3] = IEEE80211_HE_MAC_CAP3_OMI_CONTROL | IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2, From e6d419f943318e2b903e380dfd52a8dda6db3021 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2020 10:08:14 +0300 Subject: [PATCH 041/100] iwlwifi: mvm: fix inactive TID removal return value usage The function iwl_mvm_remove_inactive_tids() returns bool, so we should just check "if (ret)", not "if (ret >= 0)" (which would do nothing useful here). We obviously therefore cannot use the return value of the function for the free_queue, we need to use the queue (i) we're currently dealing with instead. Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417100405.9d862ed72535.I9e27ccc3ee3c8855fc13682592b571581925dfbd@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 251d6fbb1da5..56ae72debb96 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1169,9 +1169,9 @@ static int iwl_mvm_inactivity_check(struct iwl_mvm *mvm, u8 alloc_for_sta) inactive_tid_bitmap, &unshare_queues, &changetid_queues); - if (ret >= 0 && free_queue < 0) { + if (ret && free_queue < 0) { queue_owner = sta; - free_queue = ret; + free_queue = i; } /* only unlock sta lock - we still need the queue info lock */ spin_unlock_bh(&mvmsta->lock); From 1edd56e69dca9098e63d8d5815aeb83eeeb10a79 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 17 Apr 2020 13:37:11 +0300 Subject: [PATCH 042/100] iwlwifi: fix WGDS check when WRDS is disabled In the reference BIOS implementation, WRDS can be disabled without disabling WGDS. And this happens in most cases where WRDS is disabled, causing the WGDS without WRDS check and issue an error. To avoid this issue, we change the check so that we only considered it an error if the WRDS entry doesn't exist. If the entry (or the selected profile is disabled for any other reason), we just silently ignore WGDS. Cc: stable@vger.kernel.org # 4.14+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=205513 Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200417133700.72ad25c3998b.I875d935cefd595ed7f640ddcfc7bc802627d2b7f@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 9 +++++-- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 25 +++++++++----------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index ba2aff3af0fe..e3a33388be70 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -296,9 +296,14 @@ int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt, if (!prof->enabled) { IWL_DEBUG_RADIO(fwrt, "SAR profile %d is disabled.\n", profs[i]); - /* if one of the profiles is disabled, we fail all */ - return -ENOENT; + /* + * if one of the profiles is disabled, we + * ignore all of them and return 1 to + * differentiate disabled from other failures. + */ + return 1; } + IWL_DEBUG_INFO(fwrt, "SAR EWRD: chain %d profile index %d\n", i, profs[i]); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index a4038f289ab3..e67c452fa92c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -727,6 +727,7 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b) struct iwl_dev_tx_power_cmd_v4 v4; } cmd; + int ret; u16 len = 0; cmd.v5.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS); @@ -741,9 +742,14 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b) len = sizeof(cmd.v4.v3); - if (iwl_sar_select_profile(&mvm->fwrt, cmd.v5.v3.per_chain_restriction, - prof_a, prof_b)) - return -ENOENT; + ret = iwl_sar_select_profile(&mvm->fwrt, + cmd.v5.v3.per_chain_restriction, + prof_a, prof_b); + + /* return on error or if the profile is disabled (positive number) */ + if (ret) + return ret; + IWL_DEBUG_RADIO(mvm, "Sending REDUCE_TX_POWER_CMD per chain\n"); return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd); } @@ -1034,16 +1040,7 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) "EWRD SAR BIOS table invalid or unavailable. (%d)\n", ret); - ret = iwl_mvm_sar_select_profile(mvm, 1, 1); - /* - * If we don't have profile 0 from BIOS, just skip it. This - * means that SAR Geo will not be enabled either, even if we - * have other valid profiles. - */ - if (ret == -ENOENT) - return 1; - - return ret; + return iwl_mvm_sar_select_profile(mvm, 1, 1); } static int iwl_mvm_load_rt_fw(struct iwl_mvm *mvm) @@ -1272,7 +1269,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm) ret = iwl_mvm_sar_init(mvm); if (ret == 0) { ret = iwl_mvm_sar_geo_init(mvm); - } else if (ret > 0 && !iwl_sar_get_wgds_table(&mvm->fwrt)) { + } else if (ret == -ENOENT && !iwl_sar_get_wgds_table(&mvm->fwrt)) { /* * If basic SAR is not available, we check for WGDS, * which should *not* be available either. If it is From 10e41f34a019b1e4487de2c2941fbc212404c3fe Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Sat, 18 Apr 2020 15:40:57 +0800 Subject: [PATCH 043/100] MAINTAINERS: update mt76 reviewers Roy no longer works here. Time to say goodbye, my friend. Signed-off-by: Ryder Lee Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/c171e0dfce9f2dad5ca6935eaf6004117f82e259.1587195398.git.ryder.lee@mediatek.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8536659c6a5b..2ba87d71abe6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10694,7 +10694,6 @@ MEDIATEK MT76 WIRELESS LAN DRIVER M: Felix Fietkau M: Lorenzo Bianconi R: Ryder Lee -R: Roy Luo L: linux-wireless@vger.kernel.org S: Maintained F: drivers/net/wireless/mediatek/mt76/ From 526f3d96b8f83b1b13d73bd0b5c79cc2c487ec8e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 20 Apr 2020 09:04:24 +0200 Subject: [PATCH 044/100] cgroup, netclassid: remove double cond_resched Commit 018d26fcd12a ("cgroup, netclassid: periodically release file_lock on classid") added a second cond_resched to write_classid indirectly by update_classid_task. Remove the one in write_classid. Signed-off-by: Jiri Slaby Cc: Dmitry Yakunin Cc: Konstantin Khlebnikov Cc: David S. Miller Signed-off-by: David S. Miller --- net/core/netclassid_cgroup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index b4c87fe31be2..41b24cd31562 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -127,10 +127,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, cs->classid = (u32)value; css_task_iter_start(css, 0, &it); - while ((p = css_task_iter_next(&it))) { + while ((p = css_task_iter_next(&it))) update_classid_task(p, cs->classid); - cond_resched(); - } css_task_iter_end(&it); return 0; From e9a9e519941c2a30f1ac973f84a555b86132265d Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 20 Apr 2020 20:34:31 +0800 Subject: [PATCH 045/100] ptp: Remove unneeded conversion to bool The '==' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/ptp/ptp_ines.c:403:55-60: WARNING: conversion to bool not needed here drivers/ptp/ptp_ines.c:404:55-60: WARNING: conversion to bool not needed here Signed-off-by: Jason Yan Signed-off-by: David S. Miller --- drivers/ptp/ptp_ines.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index dfda54cbd866..52d77db39829 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -400,8 +400,8 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) ines_write32(port, ts_stat_rx, ts_stat_rx); ines_write32(port, ts_stat_tx, ts_stat_tx); - port->rxts_enabled = ts_stat_rx == TS_ENABLE ? true : false; - port->txts_enabled = ts_stat_tx == TS_ENABLE ? true : false; + port->rxts_enabled = ts_stat_rx == TS_ENABLE; + port->txts_enabled = ts_stat_tx == TS_ENABLE; spin_unlock_irqrestore(&port->lock, flags); From 7ff4f0631faa45d90775b2468a8d5ef580c7d359 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 20 Apr 2020 20:34:48 +0800 Subject: [PATCH 046/100] i40e: Remove unneeded conversion to bool The '==' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/net/ethernet/intel/i40e/i40e_main.c:1614:52-57: WARNING: conversion to bool not needed here drivers/net/ethernet/intel/i40e/i40e_main.c:11439:52-57: WARNING: conversion to bool not needed here Signed-off-by: Jason Yan Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8c3e753bfb9d..2a037ec244b9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1611,7 +1611,7 @@ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, } } if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { @@ -11436,7 +11436,7 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, } if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { From c95576a34cf797f8ad34032a28b93bb7314a636d Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 20 Apr 2020 20:35:06 +0800 Subject: [PATCH 047/100] e1000: remove unneeded conversion to bool The '==' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/net/ethernet/intel/e1000/e1000_main.c:1479:44-49: WARNING: conversion to bool not needed here Signed-off-by: Jason Yan Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index f7103356ef56..0d51cbc88028 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1476,7 +1476,7 @@ static bool e1000_check_64k_bound(struct e1000_adapter *adapter, void *start, if (hw->mac_type == e1000_82545 || hw->mac_type == e1000_ce4100 || hw->mac_type == e1000_82546) { - return ((begin ^ (end - 1)) >> 16) != 0 ? false : true; + return ((begin ^ (end - 1)) >> 16) == 0; } return true; From 4dee15b4fd0d61ec6bbd179238191e959d34cf7a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 20 Apr 2020 13:29:40 +0000 Subject: [PATCH 048/100] macvlan: fix null dereference in macvlan_device_event() In the macvlan_device_event(), the list_first_entry_or_null() is used. This function could return null pointer if there is no node. But, the macvlan module doesn't check the null pointer. So, null-ptr-deref would occur. bond0 | +----+-----+ | | macvlan0 macvlan1 | | dummy0 dummy1 The problem scenario. If dummy1 is removed, 1. ->dellink() of dummy1 is called. 2. NETDEV_UNREGISTER of dummy1 notification is sent to macvlan module. 3. ->dellink() of macvlan1 is called. 4. NETDEV_UNREGISTER of macvlan1 notification is sent to bond module. 5. __bond_release_one() is called and it internally calls dev_set_mac_address(). 6. dev_set_mac_address() calls the ->ndo_set_mac_address() of macvlan1, which is macvlan_set_mac_address(). 7. macvlan_set_mac_address() calls the dev_set_mac_address() with dummy1. 8. NETDEV_CHANGEADDR of dummy1 is sent to macvlan module. 9. In the macvlan_device_event(), it calls list_first_entry_or_null(). At this point, dummy1 and macvlan1 were removed. So, list_first_entry_or_null() will return NULL. Test commands: ip netns add nst ip netns exec nst ip link add bond0 type bond for i in {0..10} do ip netns exec nst ip link add dummy$i type dummy ip netns exec nst ip link add macvlan$i link dummy$i \ type macvlan mode passthru ip netns exec nst ip link set macvlan$i master bond0 done ip netns del nst Splat looks like: [ 40.585687][ T146] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP DEI [ 40.587249][ T146] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 40.588342][ T146] CPU: 1 PID: 146 Comm: kworker/u8:2 Not tainted 5.7.0-rc1+ #532 [ 40.589299][ T146] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 40.590469][ T146] Workqueue: netns cleanup_net [ 40.591045][ T146] RIP: 0010:macvlan_device_event+0x4e2/0x900 [macvlan] [ 40.591905][ T146] Code: 00 00 00 00 00 fc ff df 80 3c 06 00 0f 85 45 02 00 00 48 89 da 48 b8 00 00 00 00 00 fc ff d2 [ 40.594126][ T146] RSP: 0018:ffff88806116f4a0 EFLAGS: 00010246 [ 40.594783][ T146] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 40.595653][ T146] RDX: 0000000000000000 RSI: ffff88806547ddd8 RDI: ffff8880540f1360 [ 40.596495][ T146] RBP: ffff88804011a808 R08: fffffbfff4fb8421 R09: fffffbfff4fb8421 [ 40.597377][ T146] R10: ffffffffa7dc2107 R11: 0000000000000000 R12: 0000000000000008 [ 40.598186][ T146] R13: ffff88804011a000 R14: ffff8880540f1000 R15: 1ffff1100c22de9a [ 40.599012][ T146] FS: 0000000000000000(0000) GS:ffff888067800000(0000) knlGS:0000000000000000 [ 40.600004][ T146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 40.600665][ T146] CR2: 00005572d3a807b8 CR3: 000000005fcf4003 CR4: 00000000000606e0 [ 40.601485][ T146] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 40.602461][ T146] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 40.603443][ T146] Call Trace: [ 40.603871][ T146] ? nf_tables_dump_setelem+0xa0/0xa0 [nf_tables] [ 40.604587][ T146] ? macvlan_uninit+0x100/0x100 [macvlan] [ 40.605212][ T146] ? __module_text_address+0x13/0x140 [ 40.605842][ T146] notifier_call_chain+0x90/0x160 [ 40.606477][ T146] dev_set_mac_address+0x28e/0x3f0 [ 40.607117][ T146] ? netdev_notify_peers+0xc0/0xc0 [ 40.607762][ T146] ? __module_text_address+0x13/0x140 [ 40.608440][ T146] ? notifier_call_chain+0x90/0x160 [ 40.609097][ T146] ? dev_set_mac_address+0x1f0/0x3f0 [ 40.609758][ T146] dev_set_mac_address+0x1f0/0x3f0 [ 40.610402][ T146] ? __local_bh_enable_ip+0xe9/0x1b0 [ 40.611071][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.611823][ T146] ? netdev_notify_peers+0xc0/0xc0 [ 40.612461][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.613213][ T146] ? bond_hw_addr_flush+0x77/0x100 [bonding] [ 40.613963][ T146] ? __local_bh_enable_ip+0xe9/0x1b0 [ 40.614631][ T146] ? bond_time_in_interval.isra.31+0x90/0x90 [bonding] [ 40.615484][ T146] ? __bond_release_one+0x9f0/0x12c0 [bonding] [ 40.616230][ T146] __bond_release_one+0x9f0/0x12c0 [bonding] [ 40.616949][ T146] ? bond_enslave+0x47c0/0x47c0 [bonding] [ 40.617642][ T146] ? lock_downgrade+0x730/0x730 [ 40.618218][ T146] ? check_flags.part.42+0x450/0x450 [ 40.618850][ T146] ? __mutex_unlock_slowpath+0xd0/0x670 [ 40.619519][ T146] ? trace_hardirqs_on+0x30/0x180 [ 40.620117][ T146] ? wait_for_completion+0x250/0x250 [ 40.620754][ T146] bond_netdev_event+0x822/0x970 [bonding] [ 40.621460][ T146] ? __module_text_address+0x13/0x140 [ 40.622097][ T146] notifier_call_chain+0x90/0x160 [ 40.622806][ T146] rollback_registered_many+0x660/0xcf0 [ 40.623522][ T146] ? netif_set_real_num_tx_queues+0x780/0x780 [ 40.624290][ T146] ? notifier_call_chain+0x90/0x160 [ 40.624957][ T146] ? netdev_upper_dev_unlink+0x114/0x180 [ 40.625686][ T146] ? __netdev_adjacent_dev_unlink_neighbour+0x30/0x30 [ 40.626421][ T146] ? mutex_is_locked+0x13/0x50 [ 40.627016][ T146] ? unregister_netdevice_queue+0xf2/0x240 [ 40.627663][ T146] unregister_netdevice_many.part.134+0x13/0x1b0 [ 40.628362][ T146] default_device_exit_batch+0x2d9/0x390 [ 40.628987][ T146] ? unregister_netdevice_many+0x40/0x40 [ 40.629615][ T146] ? dev_change_net_namespace+0xcb0/0xcb0 [ 40.630279][ T146] ? prepare_to_wait_exclusive+0x2e0/0x2e0 [ 40.630943][ T146] ? ops_exit_list.isra.9+0x97/0x140 [ 40.631554][ T146] cleanup_net+0x441/0x890 [ ... ] Fixes: e289fd28176b ("macvlan: fix the problem when mac address changes for passthru mode") Reported-by: syzbot+5035b1f9dc7ea4558d5a@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e7289d67268f..0482adc9916b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1704,7 +1704,7 @@ static int macvlan_device_event(struct notifier_block *unused, struct macvlan_dev, list); - if (macvlan_sync_address(vlan->dev, dev->dev_addr)) + if (vlan && macvlan_sync_address(vlan->dev, dev->dev_addr)) return NOTIFY_BAD; break; From d7a5502b0bb8bde54973a2ee73e7bd72e6cb195c Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 20 Apr 2020 23:06:41 +0800 Subject: [PATCH 049/100] net: broadcom: convert to devm_platform_ioremap_resource_byname() Use the function devm_platform_ioremap_resource_byname() to simplify source code which calls the functions platform_get_resource_byname() and devm_ioremap_resource(). Remove also a few error messages which became unnecessary with this software refactoring. Suggested-by: Markus Elfring Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bgmac-platform.c | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index c46c1b1416f7..a5d1a6cb9ce3 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -172,7 +172,6 @@ static int bgmac_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct bgmac *bgmac; - struct resource *regs; const u8 *mac_addr; bgmac = bgmac_alloc(&pdev->dev); @@ -202,31 +201,21 @@ static int bgmac_probe(struct platform_device *pdev) if (bgmac->irq < 0) return bgmac->irq; - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "amac_base"); - if (!regs) { - dev_err(&pdev->dev, "Unable to obtain base resource\n"); - return -EINVAL; - } - - bgmac->plat.base = devm_ioremap_resource(&pdev->dev, regs); + bgmac->plat.base = + devm_platform_ioremap_resource_byname(pdev, "amac_base"); if (IS_ERR(bgmac->plat.base)) return PTR_ERR(bgmac->plat.base); - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base"); - if (regs) { - bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(bgmac->plat.idm_base)) - return PTR_ERR(bgmac->plat.idm_base); - bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK; - } + bgmac->plat.idm_base = + devm_platform_ioremap_resource_byname(pdev, "idm_base"); + if (IS_ERR(bgmac->plat.idm_base)) + return PTR_ERR(bgmac->plat.idm_base); + bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK; - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base"); - if (regs) { - bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev, - regs); - if (IS_ERR(bgmac->plat.nicpm_base)) - return PTR_ERR(bgmac->plat.nicpm_base); - } + bgmac->plat.nicpm_base = + devm_platform_ioremap_resource_byname(pdev, "nicpm_base"); + if (IS_ERR(bgmac->plat.nicpm_base)) + return PTR_ERR(bgmac->plat.nicpm_base); bgmac->read = platform_bgmac_read; bgmac->write = platform_bgmac_write; From b9663b7ca6ff780555108394c9c1b409f63b99a7 Mon Sep 17 00:00:00 2001 From: Voon Weifeng Date: Mon, 20 Apr 2020 23:42:52 +0800 Subject: [PATCH 050/100] net: stmmac: Enable SERDES power up/down sequence This patch is to enable Intel SERDES power up/down sequence. The SERDES converts 8/10 bits data to SGMII signal. Below is an example of HW configuration for SGMII mode. The SERDES is located in the PHY IF in the diagram below. <-----------------GBE Controller---------->|<--External PHY chip--> +----------+ +----+ +---+ +----------+ | EQoS | <-GMII->| DW | < ------ > |PHY| <-SGMII-> | External | | MAC | |xPCS| |IF | | PHY | +----------+ +----+ +---+ +----------+ ^ ^ ^ ^ | | | | +---------------------MDIO-------------------------+ PHY IF configuration and status registers are accessible through mdio address 0x15 which is defined as mdio_adhoc_addr. During D0, The driver will need to power up PHY IF by changing the power state to P0. Likewise, for D3, the driver sets PHY IF power state to P3. Signed-off-by: Voon Weifeng Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 189 ++++++++++++++++++ .../net/ethernet/stmicro/stmmac/dwmac-intel.h | 23 +++ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++ include/linux/stmmac.h | 2 + 4 files changed, 237 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 5419d4e478c0..2e4aaedb93f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -5,8 +5,13 @@ #include #include #include +#include "dwmac-intel.h" #include "stmmac.h" +struct intel_priv_data { + int mdio_adhoc_addr; /* mdio address for serdes & etc */ +}; + /* This struct is used to associate PCI Function of MAC controller on a board, * discovered via DMI, with the address of PHY connected to the MAC. The * negative value of the address means that MAC controller is not connected @@ -49,6 +54,172 @@ static int stmmac_pci_find_phy_addr(struct pci_dev *pdev, return -ENODEV; } +static int serdes_status_poll(struct stmmac_priv *priv, int phyaddr, + int phyreg, u32 mask, u32 val) +{ + unsigned int retries = 10; + int val_rd; + + do { + val_rd = mdiobus_read(priv->mii, phyaddr, phyreg); + if ((val_rd & mask) == (val & mask)) + return 0; + udelay(POLL_DELAY_US); + } while (--retries); + + return -ETIMEDOUT; +} + +static int intel_serdes_powerup(struct net_device *ndev, void *priv_data) +{ + struct intel_priv_data *intel_priv = priv_data; + struct stmmac_priv *priv = netdev_priv(ndev); + int serdes_phy_addr = 0; + u32 data = 0; + + if (!intel_priv->mdio_adhoc_addr) + return 0; + + serdes_phy_addr = intel_priv->mdio_adhoc_addr; + + /* assert clk_req */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data |= SERDES_PLL_CLK; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for clk_ack assertion */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PLL_CLK, + SERDES_PLL_CLK); + + if (data) { + dev_err(priv->device, "Serdes PLL clk request timeout\n"); + return data; + } + + /* assert lane reset */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data |= SERDES_RST; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for assert lane reset reflection */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_RST, + SERDES_RST); + + if (data) { + dev_err(priv->device, "Serdes assert lane reset timeout\n"); + return data; + } + + /* move power state to P0 */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PWR_ST_MASK; + data |= SERDES_PWR_ST_P0 << SERDES_PWR_ST_SHIFT; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* Check for P0 state */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PWR_ST_MASK, + SERDES_PWR_ST_P0 << SERDES_PWR_ST_SHIFT); + + if (data) { + dev_err(priv->device, "Serdes power state P0 timeout.\n"); + return data; + } + + return 0; +} + +static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data) +{ + struct intel_priv_data *intel_priv = intel_data; + struct stmmac_priv *priv = netdev_priv(ndev); + int serdes_phy_addr = 0; + u32 data = 0; + + if (!intel_priv->mdio_adhoc_addr) + return; + + serdes_phy_addr = intel_priv->mdio_adhoc_addr; + + /* move power state to P3 */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PWR_ST_MASK; + data |= SERDES_PWR_ST_P3 << SERDES_PWR_ST_SHIFT; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* Check for P3 state */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PWR_ST_MASK, + SERDES_PWR_ST_P3 << SERDES_PWR_ST_SHIFT); + + if (data) { + dev_err(priv->device, "Serdes power state P3 timeout\n"); + return; + } + + /* de-assert clk_req */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_PLL_CLK; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for clk_ack de-assert */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_PLL_CLK, + (u32)~SERDES_PLL_CLK); + + if (data) { + dev_err(priv->device, "Serdes PLL clk de-assert timeout\n"); + return; + } + + /* de-assert lane reset */ + data = mdiobus_read(priv->mii, serdes_phy_addr, + SERDES_GCR0); + + data &= ~SERDES_RST; + + mdiobus_write(priv->mii, serdes_phy_addr, + SERDES_GCR0, data); + + /* check for de-assert lane reset reflection */ + data = serdes_status_poll(priv, serdes_phy_addr, + SERDES_GSR0, + SERDES_RST, + (u32)~SERDES_RST); + + if (data) { + dev_err(priv->device, "Serdes de-assert lane reset timeout\n"); + return; + } +} + static void common_default_data(struct plat_stmmacenet_data *plat) { plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ @@ -189,6 +360,9 @@ static int ehl_sgmii_data(struct pci_dev *pdev, plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + return ehl_common_data(pdev, plat); } @@ -233,6 +407,8 @@ static int ehl_pse0_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return ehl_pse0_common_data(pdev, plat); } @@ -263,6 +439,8 @@ static int ehl_pse1_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return ehl_pse1_common_data(pdev, plat); } @@ -291,6 +469,8 @@ static int tgl_sgmii_data(struct pci_dev *pdev, plat->bus_id = 1; plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); } @@ -417,11 +597,17 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct stmmac_pci_info *info = (struct stmmac_pci_info *)id->driver_data; + struct intel_priv_data *intel_priv; struct plat_stmmacenet_data *plat; struct stmmac_resources res; int i; int ret; + intel_priv = devm_kzalloc(&pdev->dev, sizeof(*intel_priv), + GFP_KERNEL); + if (!intel_priv) + return -ENOMEM; + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); if (!plat) return -ENOMEM; @@ -457,6 +643,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); + plat->bsp_priv = intel_priv; + intel_priv->mdio_adhoc_addr = 0x15; + ret = info->setup(pdev, plat); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h new file mode 100644 index 000000000000..e723096c0b15 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Intel Corporation + * DWMAC Intel header file + */ + +#ifndef __DWMAC_INTEL_H__ +#define __DWMAC_INTEL_H__ + +#define POLL_DELAY_US 8 + +/* SERDES Register */ +#define SERDES_GSR0 0x5 /* Global Status Reg0 */ +#define SERDES_GCR0 0xb /* Global Configuration Reg0 */ + +/* SERDES defines */ +#define SERDES_PLL_CLK BIT(0) /* PLL clk valid signal */ +#define SERDES_RST BIT(2) /* Serdes Reset */ +#define SERDES_PWR_ST_MASK GENMASK(6, 4) /* Serdes Power state*/ +#define SERDES_PWR_ST_SHIFT 4 +#define SERDES_PWR_ST_P0 0x0 +#define SERDES_PWR_ST_P3 0x3 + +#endif /* __DWMAC_INTEL_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e6898fd5223f..565da6498c84 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4986,6 +4986,14 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; } + if (priv->plat->serdes_powerup) { + ret = priv->plat->serdes_powerup(ndev, + priv->plat->bsp_priv); + + if (ret < 0) + return ret; + } + #ifdef CONFIG_DEBUG_FS stmmac_init_fs(ndev); #endif @@ -5029,6 +5037,9 @@ int stmmac_dvr_remove(struct device *dev) stmmac_stop_all_dma(priv); + if (priv->plat->serdes_powerdown) + priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); + stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); @@ -5081,6 +5092,9 @@ int stmmac_suspend(struct device *dev) /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); + if (priv->plat->serdes_powerdown) + priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); + /* Enable Power down mode by programming the PMT regs */ if (device_may_wakeup(priv->device)) { stmmac_pmt(priv, priv->hw, priv->wolopts); @@ -5143,6 +5157,7 @@ int stmmac_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + int ret; if (!netif_running(ndev)) return 0; @@ -5170,6 +5185,14 @@ int stmmac_resume(struct device *dev) stmmac_mdio_reset(priv->mii); } + if (priv->plat->serdes_powerup) { + ret = priv->plat->serdes_powerup(ndev, + priv->plat->bsp_priv); + + if (ret < 0) + return ret; + } + netif_device_attach(ndev); mutex_lock(&priv->lock); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fbafb353e9be..bd964c31d333 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -177,6 +177,8 @@ struct plat_stmmacenet_data { struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); + int (*serdes_powerup)(struct net_device *ndev, void *priv); + void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); From 63edbcceef612bdd95fa28ce100460c7b79008a4 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Mon, 20 Apr 2020 11:51:41 -0400 Subject: [PATCH 051/100] net: phy: microchip_t1: add lan87xx_phy_init to initialize the lan87xx phy. lan87xx_phy_init() initializes the lan87xx phy hardware including its TC10 Wake-up and Sleep features. Fixes: 3e50d2da5850 ("Add driver for Microchip LAN87XX T1 PHYs") Signed-off-by: Yuiko Oshino v0->v1: - Add more details in the commit message and source comments. - Update to the latest initialization sequences. - Add access_ereg_modify_changed(). - Fix access_ereg() to access SMI bank correctly. Signed-off-by: David S. Miller --- drivers/net/phy/microchip_t1.c | 171 +++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index 001def4509c2..fed3e395f18e 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -3,9 +3,21 @@ #include #include +#include #include #include +/* External Register Control Register */ +#define LAN87XX_EXT_REG_CTL (0x14) +#define LAN87XX_EXT_REG_CTL_RD_CTL (0x1000) +#define LAN87XX_EXT_REG_CTL_WR_CTL (0x0800) + +/* External Register Read Data Register */ +#define LAN87XX_EXT_REG_RD_DATA (0x15) + +/* External Register Write Data Register */ +#define LAN87XX_EXT_REG_WR_DATA (0x16) + /* Interrupt Source Register */ #define LAN87XX_INTERRUPT_SOURCE (0x18) @@ -14,9 +26,160 @@ #define LAN87XX_MASK_LINK_UP (0x0004) #define LAN87XX_MASK_LINK_DOWN (0x0002) +/* phyaccess nested types */ +#define PHYACC_ATTR_MODE_READ 0 +#define PHYACC_ATTR_MODE_WRITE 1 +#define PHYACC_ATTR_MODE_MODIFY 2 + +#define PHYACC_ATTR_BANK_SMI 0 +#define PHYACC_ATTR_BANK_MISC 1 +#define PHYACC_ATTR_BANK_PCS 2 +#define PHYACC_ATTR_BANK_AFE 3 +#define PHYACC_ATTR_BANK_MAX 7 + #define DRIVER_AUTHOR "Nisar Sayed " #define DRIVER_DESC "Microchip LAN87XX T1 PHY driver" +struct access_ereg_val { + u8 mode; + u8 bank; + u8 offset; + u16 val; + u16 mask; +}; + +static int access_ereg(struct phy_device *phydev, u8 mode, u8 bank, + u8 offset, u16 val) +{ + u16 ereg = 0; + int rc = 0; + + if (mode > PHYACC_ATTR_MODE_WRITE || bank > PHYACC_ATTR_BANK_MAX) + return -EINVAL; + + if (bank == PHYACC_ATTR_BANK_SMI) { + if (mode == PHYACC_ATTR_MODE_WRITE) + rc = phy_write(phydev, offset, val); + else + rc = phy_read(phydev, offset); + return rc; + } + + if (mode == PHYACC_ATTR_MODE_WRITE) { + ereg = LAN87XX_EXT_REG_CTL_WR_CTL; + rc = phy_write(phydev, LAN87XX_EXT_REG_WR_DATA, val); + if (rc < 0) + return rc; + } else { + ereg = LAN87XX_EXT_REG_CTL_RD_CTL; + } + + ereg |= (bank << 8) | offset; + + rc = phy_write(phydev, LAN87XX_EXT_REG_CTL, ereg); + if (rc < 0) + return rc; + + if (mode == PHYACC_ATTR_MODE_READ) + rc = phy_read(phydev, LAN87XX_EXT_REG_RD_DATA); + + return rc; +} + +static int access_ereg_modify_changed(struct phy_device *phydev, + u8 bank, u8 offset, u16 val, u16 mask) +{ + int new = 0, rc = 0; + + if (bank > PHYACC_ATTR_BANK_MAX) + return -EINVAL; + + rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, bank, offset, val); + if (rc < 0) + return rc; + + new = val | (rc & (mask ^ 0xFFFF)); + rc = access_ereg(phydev, PHYACC_ATTR_MODE_WRITE, bank, offset, new); + + return rc; +} + +static int lan87xx_phy_init(struct phy_device *phydev) +{ + static const struct access_ereg_val init[] = { + /* TX Amplitude = 5 */ + {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_AFE, 0x0B, + 0x000A, 0x001E}, + /* Clear SMI interrupts */ + {PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_SMI, 0x18, + 0, 0}, + /* Clear MISC interrupts */ + {PHYACC_ATTR_MODE_READ, PHYACC_ATTR_BANK_MISC, 0x08, + 0, 0}, + /* Turn on TC10 Ring Oscillator (ROSC) */ + {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_MISC, 0x20, + 0x0020, 0x0020}, + /* WUR Detect Length to 1.2uS, LPC Detect Length to 1.09uS */ + {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_PCS, 0x20, + 0x283C, 0}, + /* Wake_In Debounce Length to 39uS, Wake_Out Length to 79uS */ + {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x21, + 0x274F, 0}, + /* Enable Auto Wake Forward to Wake_Out, ROSC on, Sleep, + * and Wake_In to wake PHY + */ + {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x20, + 0x80A7, 0}, + /* Enable WUP Auto Fwd, Enable Wake on MDI, Wakeup Debouncer + * to 128 uS + */ + {PHYACC_ATTR_MODE_WRITE, PHYACC_ATTR_BANK_MISC, 0x24, + 0xF110, 0}, + /* Enable HW Init */ + {PHYACC_ATTR_MODE_MODIFY, PHYACC_ATTR_BANK_SMI, 0x1A, + 0x0100, 0x0100}, + }; + int rc, i; + + /* Start manual initialization procedures in Managed Mode */ + rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI, + 0x1a, 0x0000, 0x0100); + if (rc < 0) + return rc; + + /* Soft Reset the SMI block */ + rc = access_ereg_modify_changed(phydev, PHYACC_ATTR_BANK_SMI, + 0x00, 0x8000, 0x8000); + if (rc < 0) + return rc; + + /* Check to see if the self-clearing bit is cleared */ + usleep_range(1000, 2000); + rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ, + PHYACC_ATTR_BANK_SMI, 0x00, 0); + if (rc < 0) + return rc; + if ((rc & 0x8000) != 0) + return -ETIMEDOUT; + + /* PHY Initialization */ + for (i = 0; i < ARRAY_SIZE(init); i++) { + if (init[i].mode == PHYACC_ATTR_MODE_MODIFY) { + rc = access_ereg_modify_changed(phydev, init[i].bank, + init[i].offset, + init[i].val, + init[i].mask); + } else { + rc = access_ereg(phydev, init[i].mode, init[i].bank, + init[i].offset, init[i].val); + } + if (rc < 0) + return rc; + } + + return 0; +} + static int lan87xx_phy_config_intr(struct phy_device *phydev) { int rc, val = 0; @@ -40,6 +203,13 @@ static int lan87xx_phy_ack_interrupt(struct phy_device *phydev) return rc < 0 ? rc : 0; } +static int lan87xx_config_init(struct phy_device *phydev) +{ + int rc = lan87xx_phy_init(phydev); + + return rc < 0 ? rc : 0; +} + static struct phy_driver microchip_t1_phy_driver[] = { { .phy_id = 0x0007c150, @@ -48,6 +218,7 @@ static struct phy_driver microchip_t1_phy_driver[] = { .features = PHY_BASIC_T1_FEATURES, + .config_init = lan87xx_config_init, .config_aneg = genphy_config_aneg, .ack_interrupt = lan87xx_phy_ack_interrupt, From 9175d3f38816835b0801bacbf4f6aff1a1672b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 20 Apr 2020 11:25:07 -0700 Subject: [PATCH 052/100] ipv6: ndisc: RFC-ietf-6man-ra-pref64-09 is now published as RFC8781 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: https://www.rfc-editor.org/authors/rfc8781.txt Cc: Erik Kline Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Michael Haro Signed-off-by: Maciej Żenczykowski Fixes: c24a77edc9a7 ("ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier") Signed-off-by: David S. Miller --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 7d107113f988..9205a76d967a 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -41,7 +41,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ - ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ + ND_OPT_PREF64 = 38, /* RFC8781 */ __ND_OPT_MAX }; From 0c922a4850eba2e668f73a3f1153196e09abb251 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 17:13:51 -0600 Subject: [PATCH 053/100] xfrm: Always set XFRM_TRANSFORMED in xfrm{4,6}_output_finish IPSKB_XFRM_TRANSFORMED and IP6SKB_XFRM_TRANSFORMED are skb flags set by xfrm code to tell other skb handlers that the packet has been passed through the xfrm output functions. Simplify the code and just always set them rather than conditionally based on netfilter enabled thus making the flag available for other users. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 2 -- net/ipv6/xfrm6_output.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 89ba7c87de5d..30ddb9dc9398 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -58,9 +58,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index fbe51d40bd7e..e34167f790e6 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -111,9 +111,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif return xfrm_output(sk, skb); } From 16b9db1ce34ff00d6c18e82825125cfef0cdfb13 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 17:13:52 -0600 Subject: [PATCH 054/100] vrf: Check skb for XFRM_TRANSFORMED flag To avoid a loop with qdiscs and xfrms, check if the skb has already gone through the qdisc attached to the VRF device and then to the xfrm layer. If so, no need for a second redirect. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Reported-by: Trev Larock Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 66e00ddc0d42..6f5d03b7d9c0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -474,7 +474,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); @@ -686,7 +687,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; - if (qdisc_tx_is_default(vrf_dev)) + if (qdisc_tx_is_default(vrf_dev) || + IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); From 2e97b0cd1651a270f3a3fcf42115c51f3284c049 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:51 -0700 Subject: [PATCH 055/100] net: dsa: b53: Lookup VID in ARL searches when VLAN is enabled When VLAN is enabled, and an ARL search is issued, we also need to compare the full {MAC,VID} tuple before returning a successful search result. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 68e2381694b9..fa9b9aca7b56 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1505,6 +1505,9 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, continue; if ((mac_vid & ARLTBL_MAC_MASK) != mac) continue; + if (dev->vlan_enabled && + ((mac_vid >> ARLTBL_VID_S) & ARLTBL_VID_MASK) != vid) + continue; *idx = i; } From eab167f4851a19c514469dfa81147f77e17b5b20 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:52 -0700 Subject: [PATCH 056/100] net: dsa: b53: Fix valid setting for MDB entries When support for the MDB entries was added, the valid bit was correctly changed to be assigned depending on the remaining port bitmask, that is, if there were no more ports added to the entry's port bitmask, the entry now becomes invalid. There was another assignment a few lines below that would override this which would invalidate entries even when there were still multiple ports left in the MDB entry. Fixes: 5d65b64a3d97 ("net: dsa: b53: Add support for MDB") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index fa9b9aca7b56..e937bf365490 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1561,7 +1561,6 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, ent.is_valid = !!(ent.port); } - ent.is_valid = is_valid; ent.vid = vid; ent.is_static = true; ent.is_age = false; From c2e77a18a7ed65eb48f6e389b6a59a0fd753646a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:53 -0700 Subject: [PATCH 057/100] net: dsa: b53: Fix ARL register definitions The ARL {MAC,VID} tuple and the forward entry were off by 0x10 bytes, which means that when we read/wrote from/to ARL bin index 0, we were actually accessing the ARLA_RWCTRL register. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 2a9f421680aa..d914e756cdab 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -304,7 +304,7 @@ * * BCM5325 and BCM5365 share most definitions below */ -#define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n)) +#define B53_ARLTBL_MAC_VID_ENTRY(n) ((0x10 * (n)) + 0x10) #define ARLTBL_MAC_MASK 0xffffffffffffULL #define ARLTBL_VID_S 48 #define ARLTBL_VID_MASK_25 0xff @@ -316,7 +316,7 @@ #define ARLTBL_VALID_25 BIT(63) /* ARL Table Data Entry N Registers (32 bit) */ -#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x08) +#define B53_ARLTBL_DATA_ENTRY(n) ((0x10 * (n)) + 0x18) #define ARLTBL_DATA_PORT_ID_MASK 0x1ff #define ARLTBL_TC(tc) ((3 & tc) << 11) #define ARLTBL_AGE BIT(14) From 6344dbde6a27d10d16246d734b968f84887841e2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:54 -0700 Subject: [PATCH 058/100] net: dsa: b53: Rework ARL bin logic When asking the ARL to read a MAC address, we will get a number of bins returned in a single read. Out of those bins, there can essentially be 3 states: - all bins are full, we have no space left, and we can either replace an existing address or return that full condition - the MAC address was found, then we need to return its bin index and modify that one, and only that one - the MAC address was not found and we have a least one bin free, we use that bin index location then The code would unfortunately fail on all counts. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 30 ++++++++++++++++++++++++++---- drivers/net/dsa/b53/b53_regs.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e937bf365490..8cb41583bbad 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1483,6 +1483,7 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, u16 vid, struct b53_arl_entry *ent, u8 *idx, bool is_valid) { + DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES); unsigned int i; int ret; @@ -1490,6 +1491,8 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, if (ret) return ret; + bitmap_zero(free_bins, dev->num_arl_entries); + /* Read the bins */ for (i = 0; i < dev->num_arl_entries; i++) { u64 mac_vid; @@ -1501,16 +1504,24 @@ static int b53_arl_read(struct b53_device *dev, u64 mac, B53_ARLTBL_DATA_ENTRY(i), &fwd_entry); b53_arl_to_entry(ent, mac_vid, fwd_entry); - if (!(fwd_entry & ARLTBL_VALID)) + if (!(fwd_entry & ARLTBL_VALID)) { + set_bit(i, free_bins); continue; + } if ((mac_vid & ARLTBL_MAC_MASK) != mac) continue; if (dev->vlan_enabled && ((mac_vid >> ARLTBL_VID_S) & ARLTBL_VID_MASK) != vid) continue; *idx = i; + return 0; } + if (bitmap_weight(free_bins, dev->num_arl_entries) == 0) + return -ENOSPC; + + *idx = find_first_bit(free_bins, dev->num_arl_entries); + return -ENOENT; } @@ -1540,10 +1551,21 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, if (op) return ret; - /* We could not find a matching MAC, so reset to a new entry */ - if (ret) { + switch (ret) { + case -ENOSPC: + dev_dbg(dev->dev, "{%pM,%.4d} no space left in ARL\n", + addr, vid); + return is_valid ? ret : 0; + case -ENOENT: + /* We could not find a matching MAC, so reset to a new entry */ + dev_dbg(dev->dev, "{%pM,%.4d} not found, using idx: %d\n", + addr, vid, idx); fwd_entry = 0; - idx = 1; + break; + default: + dev_dbg(dev->dev, "{%pM,%.4d} found, using idx: %d\n", + addr, vid, idx); + break; } /* For multicast address, the port is a bitmask and the validity diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index d914e756cdab..14f617e9173d 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -323,6 +323,9 @@ #define ARLTBL_STATIC BIT(15) #define ARLTBL_VALID BIT(16) +/* Maximum number of bin entries in the ARL for all switches */ +#define B53_ARLTBL_MAX_BIN_ENTRIES 4 + /* ARL Search Control Register (8 bit) */ #define B53_ARL_SRCH_CTL 0x50 #define B53_ARL_SRCH_CTL_25 0x20 From 64fec9493f7dc9bdd7233bcfe98985c45bd0e3c1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 20 Apr 2020 20:26:55 -0700 Subject: [PATCH 059/100] net: dsa: b53: b53_arl_rw_op() needs to select IVL or SVL Flip the IVL_SVL_SELECT bit correctly based on the VLAN enable status, the default is to perform Shared VLAN learning instead of Individual learning. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++++ drivers/net/dsa/b53/b53_regs.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 8cb41583bbad..c283593bef17 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1474,6 +1474,10 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op) reg |= ARLTBL_RW; else reg &= ~ARLTBL_RW; + if (dev->vlan_enabled) + reg &= ~ARLTBL_IVL_SVL_SELECT; + else + reg |= ARLTBL_IVL_SVL_SELECT; b53_write8(dev, B53_ARLIO_PAGE, B53_ARLTBL_RW_CTRL, reg); return b53_arl_op_wait(dev); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 14f617e9173d..c90985c294a2 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -292,6 +292,7 @@ /* ARL Table Read/Write Register (8 bit) */ #define B53_ARLTBL_RW_CTRL 0x00 #define ARLTBL_RW BIT(0) +#define ARLTBL_IVL_SVL_SELECT BIT(6) #define ARLTBL_START_DONE BIT(7) /* MAC Address Index Register (48 bit) */ From 2c1dd4c110627c2a4f006643f074119205cfcff4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 21 Apr 2020 08:47:24 -0600 Subject: [PATCH 060/100] selftests: Fix suppress test in fib_tests.sh fib_tests is spewing errors: ... Cannot open network namespace "ns1": No such file or directory Cannot open network namespace "ns1": No such file or directory Cannot open network namespace "ns1": No such file or directory Cannot open network namespace "ns1": No such file or directory ping: connect: Network is unreachable Cannot open network namespace "ns1": No such file or directory Cannot open network namespace "ns1": No such file or directory ... Each test entry in fib_tests is supposed to do its own setup and cleanup. Right now the $IP commands in fib_suppress_test are failing because there is no ns1. Add the setup/cleanup and logging expected for each test. Fixes: ca7a03c41753 ("ipv6: do not free rt if FIB_LOOKUP_NOREF is set on suppress rule") Signed-off-by: David Ahern Cc: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index b7616704b55e..84205c3a55eb 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -618,16 +618,22 @@ fib_nexthop_test() fib_suppress_test() { + echo + echo "FIB rule with suppress_prefixlength" + setup + $IP link add dummy1 type dummy $IP link set dummy1 up $IP -6 route add default dev dummy1 $IP -6 rule add table main suppress_prefixlength 0 - ping -f -c 1000 -W 1 1234::1 || true + ping -f -c 1000 -W 1 1234::1 >/dev/null 2>&1 $IP -6 rule del table main suppress_prefixlength 0 $IP link del dummy1 # If we got here without crashing, we're good. - return 0 + log_test 0 0 "FIB rule suppress test" + + cleanup } ################################################################################ From a1211bf9a7774706722ba3b18c6157d980319f79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Apr 2020 10:00:28 -0700 Subject: [PATCH 061/100] sched: etf: do not assume all sockets are full blown skb->sk does not always point to a full blown socket, we need to use sk_fullsock() before accessing fields which only make sense on full socket. BUG: KASAN: use-after-free in report_sock_error+0x286/0x300 net/sched/sch_etf.c:141 Read of size 1 at addr ffff88805eb9b245 by task syz-executor.5/9630 CPU: 1 PID: 9630 Comm: syz-executor.5 Not tainted 5.7.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd3/0x315 mm/kasan/report.c:382 __kasan_report.cold+0x35/0x4d mm/kasan/report.c:511 kasan_report+0x33/0x50 mm/kasan/common.c:625 report_sock_error+0x286/0x300 net/sched/sch_etf.c:141 etf_enqueue_timesortedlist+0x389/0x740 net/sched/sch_etf.c:170 __dev_xmit_skb net/core/dev.c:3710 [inline] __dev_queue_xmit+0x154a/0x30a0 net/core/dev.c:4021 neigh_hh_output include/net/neighbour.h:499 [inline] neigh_output include/net/neighbour.h:508 [inline] ip6_finish_output2+0xfb5/0x25b0 net/ipv6/ip6_output.c:117 __ip6_finish_output+0x442/0xab0 net/ipv6/ip6_output.c:143 ip6_finish_output+0x34/0x1f0 net/ipv6/ip6_output.c:153 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x239/0x810 net/ipv6/ip6_output.c:176 dst_output include/net/dst.h:435 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip6_xmit+0xe1a/0x2090 net/ipv6/ip6_output.c:280 tcp_v6_send_synack+0x4e7/0x960 net/ipv6/tcp_ipv6.c:521 tcp_rtx_synack+0x10d/0x1a0 net/ipv4/tcp_output.c:3916 inet_rtx_syn_ack net/ipv4/inet_connection_sock.c:669 [inline] reqsk_timer_handler+0x4c2/0xb40 net/ipv4/inet_connection_sock.c:763 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1405 expire_timers kernel/time/timer.c:1450 [inline] __run_timers kernel/time/timer.c:1774 [inline] __run_timers kernel/time/timer.c:1741 [inline] run_timer_softirq+0x623/0x1600 kernel/time/timer.c:1787 __do_softirq+0x26c/0x9f7 kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x192/0x1d0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:546 [inline] smp_apic_timer_interrupt+0x19e/0x600 arch/x86/kernel/apic/apic.c:1140 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 RIP: 0010:des_encrypt+0x157/0x9c0 lib/crypto/des.c:792 Code: 85 22 06 00 00 41 31 dc 41 8b 4d 04 44 89 e2 41 83 e4 3f 4a 8d 3c a5 60 72 72 88 81 e2 3f 3f 3f 3f 48 89 f8 48 c1 e8 03 31 d9 <0f> b6 34 28 48 89 f8 c1 c9 04 83 e0 07 83 c0 03 40 38 f0 7c 09 40 RSP: 0018:ffffc90003b5f6c0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff10e4e55 RBX: 00000000d2f846d0 RCX: 00000000d2f846d0 RDX: 0000000012380612 RSI: ffffffff839863ca RDI: ffffffff887272a8 RBP: dffffc0000000000 R08: ffff888091d0a380 R09: 0000000000800081 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000012 R13: ffff8880a8ae8078 R14: 00000000c545c93e R15: 0000000000000006 cipher_crypt_one crypto/cipher.c:75 [inline] crypto_cipher_encrypt_one+0x124/0x210 crypto/cipher.c:82 crypto_cbcmac_digest_update+0x1b5/0x250 crypto/ccm.c:830 crypto_shash_update+0xc4/0x120 crypto/shash.c:119 shash_ahash_update+0xa3/0x110 crypto/shash.c:246 crypto_ahash_update include/crypto/hash.h:547 [inline] hash_sendmsg+0x518/0xad0 crypto/algif_hash.c:102 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x308/0x7e0 net/socket.c:2362 ___sys_sendmsg+0x100/0x170 net/socket.c:2416 __sys_sendmmsg+0x195/0x480 net/socket.c:2506 __do_sys_sendmmsg net/socket.c:2535 [inline] __se_sys_sendmmsg net/socket.c:2532 [inline] __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2532 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295 entry_SYSCALL_64_after_hwframe+0x49/0xb3 RIP: 0033:0x45c829 Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f6d9528ec78 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004fc080 RCX: 000000000045c829 RDX: 0000000000000001 RSI: 0000000020002640 RDI: 0000000000000004 RBP: 000000000078bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000008d7 R14: 00000000004cb7aa R15: 00007f6d9528f6d4 Fixes: 4b15c7075352 ("net/sched: Make etf report drops on error_queue") Fixes: 25db26a91364 ("net/sched: Introduce the ETF Qdisc") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Vinicius Costa Gomes Reviewed-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_etf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index b1da5589a0c6..c48f91075b5c 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -82,7 +82,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) if (q->skip_sock_check) goto skip; - if (!sk) + if (!sk || !sk_fullsock(sk)) return false; if (!sock_flag(sk, SOCK_TXTIME)) @@ -137,8 +137,9 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) struct sock_exterr_skb *serr; struct sk_buff *clone; ktime_t txtime = skb->tstamp; + struct sock *sk = skb->sk; - if (!skb->sk || !(skb->sk->sk_txtime_report_errors)) + if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors)) return; clone = skb_clone(skb, GFP_ATOMIC); @@ -154,7 +155,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ serr->ee.ee_info = txtime; /* low part of tstamp */ - if (sock_queue_err_skb(skb->sk, clone)) + if (sock_queue_err_skb(sk, clone)) kfree_skb(clone); } From 72579e14a1d3d3d561039dfe7e5f47aaf22e3fd3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Apr 2020 20:18:53 +0300 Subject: [PATCH 062/100] net: dsa: don't fail to probe if we couldn't set the MTU There is no reason to fail the probing of the switch if the MTU couldn't be configured correctly (either the switch port itself, or the host port) for whatever reason. MTU-sized traffic probably won't work, sure, but we can still probably limp on and support some form of communication anyway, which the users would probably appreciate more. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Reported-by: Oleksij Rempel Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e94eb1aac602..d1068803cd11 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1770,11 +1770,9 @@ int dsa_slave_create(struct dsa_port *port) rtnl_lock(); ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); rtnl_unlock(); - if (ret && ret != -EOPNOTSUPP) { - dev_err(ds->dev, "error %d setting MTU on port %d\n", - ret, port->index); - goto out_free; - } + if (ret) + dev_warn(ds->dev, "nonfatal error %d setting MTU on port %d\n", + ret, port->index); netif_carrier_off(slave_dev); From 145cb2f7177d94bc54563ed26027e952ee0ae03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jere=20Lepp=C3=A4nen?= Date: Tue, 21 Apr 2020 22:03:41 +0300 Subject: [PATCH 063/100] sctp: Fix bundling of SHUTDOWN with COOKIE-ACK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we start shutdown in sctp_sf_do_dupcook_a(), we want to bundle the SHUTDOWN with the COOKIE-ACK to ensure that the peer receives them at the same time and in the correct order. This bundling was broken by commit 4ff40b86262b ("sctp: set chunk transport correctly when it's a new asoc"), which assigns a transport for the COOKIE-ACK, but not for the SHUTDOWN. Fix this by passing a reference to the COOKIE-ACK chunk as an argument to sctp_sf_do_9_2_start_shutdown() and onward to sctp_make_shutdown(). This way the SHUTDOWN chunk is assigned the same transport as the COOKIE-ACK chunk, which allows them to be bundled. In sctp_sf_do_9_2_start_shutdown(), the void *arg parameter was previously unused. Now that we're taking it into use, it must be a valid pointer to a chunk, or NULL. There is only one call site where it's not, in sctp_sf_autoclose_timer_expire(). Fix that too. Fixes: 4ff40b86262b ("sctp: set chunk transport correctly when it's a new asoc") Signed-off-by: Jere Leppänen Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 6a16af4b1ef6..26788f4a3b9e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1865,7 +1865,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, - SCTP_ST_CHUNK(0), NULL, + SCTP_ST_CHUNK(0), repl, commands); } else { sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, @@ -5470,7 +5470,7 @@ enum sctp_disposition sctp_sf_do_9_2_start_shutdown( * in the Cumulative TSN Ack field the last sequential TSN it * has received from the peer. */ - reply = sctp_make_shutdown(asoc, NULL); + reply = sctp_make_shutdown(asoc, arg); if (!reply) goto nomem; @@ -6068,7 +6068,7 @@ enum sctp_disposition sctp_sf_autoclose_timer_expire( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, - arg, commands); + NULL, commands); } return disposition; From 12dfd78e3a74825e6f0bc8df7ef9f938fbc6bfe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jere=20Lepp=C3=A4nen?= Date: Tue, 21 Apr 2020 22:03:42 +0300 Subject: [PATCH 064/100] sctp: Fix SHUTDOWN CTSN Ack in the peer restart case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When starting shutdown in sctp_sf_do_dupcook_a(), get the value for SHUTDOWN Cumulative TSN Ack from the new association, which is reconstructed from the cookie, instead of the old association, which the peer doesn't have anymore. Otherwise the SHUTDOWN is either ignored or replied to with an ABORT by the peer because CTSN Ack doesn't match the peer's Initial TSN. Fixes: bdf6fa52f01b ("sctp: handle association restarts when the socket is closed.") Signed-off-by: Jere Leppänen Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 09050c1d5517..f7cb0b7faec2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -858,7 +858,11 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, struct sctp_chunk *retval; __u32 ctsn; - ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + if (chunk && chunk->asoc) + ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map); + else + ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, From 5cc4adbcfcad8a85fb19216c53bfc4e9fd9f6c7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 21 Apr 2020 13:34:48 -0700 Subject: [PATCH 065/100] Documentation: add documentation of ping_group_range Support for non-root users to send ICMP ECHO requests was added back in Linux 3.0 kernel, but the documentation for the sysctl to enable it has been missing. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 6fcfd313dbe4..9375324aa8e1 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -983,6 +983,13 @@ ip_early_demux - BOOLEAN reduces overall throughput, in such case you should disable it. Default: 1 +ping_group_range - 2 INTEGERS + Restrict ICMP_PROTO datagram sockets to users in the group range. + The default is "1 0", meaning, that nobody (not even root) may + create ping sockets. Setting it to "100 100" would grant permissions + to the single group. "0 4294967295" would enable it for the world, "100 + 4294967295" would enable it for the users, but not daemons. + tcp_early_demux - BOOLEAN Enable early demux for established TCP sockets. Default: 1 From a53c102872ad6e34e1518e25899dc9498c27f8b1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 21 Apr 2020 17:48:27 -0600 Subject: [PATCH 066/100] vrf: Fix IPv6 with qdisc and xfrm When a qdisc is attached to the VRF device, the packet goes down the ndo xmit function which is setup to send the packet back to the VRF driver which does a lookup to send the packet out. The lookup in the VRF driver is not considering xfrm policies. Change it to use ip6_dst_lookup_flow rather than ip6_route_output. Fixes: 35402e313663 ("net: Add IPv6 support to VRF device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6f5d03b7d9c0..56f8aab46f89 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -188,8 +188,8 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, fl6.flowi6_proto = iph->nexthdr; fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; - dst = ip6_route_output(net, NULL, &fl6); - if (dst == dst_null) + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst) || dst == dst_null) goto err; skb_dst_drop(skb); From 9a19371bf029d784aa37ee623ce175205f43ccfd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Apr 2020 18:24:56 +0200 Subject: [PATCH 067/100] mptcp: fix data_fin handing in RX path The data fin flag is set only via a DSS option, but mptcp_incoming_options() copies it unconditionally from the provided RX options. Since we do not clear all the mptcp sock RX options in a socket free/alloc cycle, we can end-up with a stray data_fin value while parsing e.g. MPC packets. That would lead to mapping data corruption and will trigger a few WARN_ON() in the RX path. Instead of adding a costly memset(), fetch the data_fin flag only for DSS packets - when we always explicitly initialize such bit at option parsing time. Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/options.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index faf57585b892..4a7c467b99db 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -876,12 +876,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->data_seq = mp_opt->data_seq; mpext->subflow_seq = mp_opt->subflow_seq; mpext->dsn64 = mp_opt->dsn64; + mpext->data_fin = mp_opt->data_fin; } mpext->data_len = mp_opt->data_len; mpext->use_map = 1; } - - mpext->data_fin = mp_opt->data_fin; } void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) From 31fa51ad7c5664d0e6530e3d537e2eb025aa1925 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Wed, 22 Apr 2020 20:52:54 +0300 Subject: [PATCH 068/100] MAINTAINERS: update dpaa2-eth maintainer list Add myself as another maintainer of dpaa2-eth. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6851ef7cf1bd..d5e4d13880b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5173,6 +5173,7 @@ S: Maintained F: drivers/soc/fsl/dpio DPAA2 ETHERNET DRIVER +M: Ioana Ciornei M: Ioana Radulescu L: netdev@vger.kernel.org S: Maintained From c0259664c6879e1045d6d6703f37501690f6760f Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Wed, 22 Apr 2020 21:07:53 +0200 Subject: [PATCH 069/100] netlabel: Kconfig: Update reference for NetLabel Tools project The NetLabel Tools project has moved from http://netlabel.sf.net to a GitHub project. Update to directly refer to the new home for the tools. Signed-off-by: Salvatore Bonaccorso Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig index 64280a1d3906..07b03c306f28 100644 --- a/net/netlabel/Kconfig +++ b/net/netlabel/Kconfig @@ -14,6 +14,6 @@ config NETLABEL Documentation/netlabel as well as the NetLabel SourceForge project for configuration tools and additional documentation. - * http://netlabel.sf.net + * https://github.com/netlabel/netlabel_tools If you are unsure, say N. From 7c74b0bec918c1e0ca0b4208038c156eacf8f13f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 22 Apr 2020 15:40:20 -0600 Subject: [PATCH 070/100] ipv4: Update fib_select_default to handle nexthop objects A user reported [0] hitting the WARN_ON in fib_info_nh: [ 8633.839816] ------------[ cut here ]------------ [ 8633.839819] WARNING: CPU: 0 PID: 1719 at include/net/nexthop.h:251 fib_select_path+0x303/0x381 ... [ 8633.839846] RIP: 0010:fib_select_path+0x303/0x381 ... [ 8633.839848] RSP: 0018:ffffb04d407f7d00 EFLAGS: 00010286 [ 8633.839850] RAX: 0000000000000000 RBX: ffff9460b9897ee8 RCX: 00000000000000fe [ 8633.839851] RDX: 0000000000000000 RSI: 00000000ffffffff RDI: 0000000000000000 [ 8633.839852] RBP: ffff946076049850 R08: 0000000059263a83 R09: ffff9460840e4000 [ 8633.839853] R10: 0000000000000014 R11: 0000000000000000 R12: ffffb04d407f7dc0 [ 8633.839854] R13: ffffffffa4ce3240 R14: 0000000000000000 R15: ffff9460b7681f60 [ 8633.839857] FS: 00007fcac2e02700(0000) GS:ffff9460bdc00000(0000) knlGS:0000000000000000 [ 8633.839858] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8633.839859] CR2: 00007f27beb77e28 CR3: 0000000077734000 CR4: 00000000000006f0 [ 8633.839867] Call Trace: [ 8633.839871] ip_route_output_key_hash_rcu+0x421/0x890 [ 8633.839873] ip_route_output_key_hash+0x5e/0x80 [ 8633.839876] ip_route_output_flow+0x1a/0x50 [ 8633.839878] __ip4_datagram_connect+0x154/0x310 [ 8633.839880] ip4_datagram_connect+0x28/0x40 [ 8633.839882] __sys_connect+0xd6/0x100 ... The WARN_ON is triggered in fib_select_default which is invoked when there are multiple default routes. Update the function to use fib_info_nhc and convert the nexthop checks to use fib_nh_common. Add test case that covers the affected code path. [0] https://github.com/FRRouting/frr/issues/6089 Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 6 +++--- tools/testing/selftests/net/fib_nexthops.sh | 23 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6ed8c9317179..55ca2e521828 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2014,7 +2014,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; - struct fib_nh *nh; + struct fib_nh_common *nhc; if (fa->fa_slen != slen) continue; @@ -2037,8 +2037,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) fa->fa_type != RTN_UNICAST) continue; - nh = fib_info_nh(next_fi, 0); - if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK) + nhc = fib_info_nhc(next_fi, 0); + if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK) continue; fib_alias_accessed(fa); diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 796670ebc65b..6560ed796ac4 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -749,6 +749,29 @@ ipv4_fcnal_runtime() run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" log_test $? 0 "Ping - multipath" + run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" + + # + # multiple default routes + # - tests fib_select_default + run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" + run_cmd "$IP ro add default nhid 501" + run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" + run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + log_test $? 0 "Ping - multiple default routes, nh first" + + # flip the order + run_cmd "$IP ro del default nhid 501" + run_cmd "$IP ro del default via 172.16.1.3 dev veth1 metric 20" + run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" + run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" + run_cmd "$IP ro add default nhid 501 metric 20" + run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + log_test $? 0 "Ping - multiple default routes, nh second" + + run_cmd "$IP nexthop delete nhid 501" + run_cmd "$IP ro del default" + # # IPv4 with blackhole nexthops # From 257d7d4f0e69f5e8e3d38351bdcab896719dba04 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 20 Apr 2020 10:18:43 -0600 Subject: [PATCH 071/100] libbpf: Only check mode flags in get_xdp_id The commit in the Fixes tag changed get_xdp_id to only return prog_id if flags is 0, but there are other XDP flags than the modes - e.g., XDP_FLAGS_UPDATE_IF_NOEXIST. Since the intention was only to look at MODE flags, clear other ones before checking if flags is 0. Fixes: f07cbad29741 ("libbpf: Fix bpf_get_link_xdp_id flags handling") Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov --- tools/lib/bpf/netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0b709fd10bba..312f887570b2 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -321,6 +321,8 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) { + flags &= XDP_FLAGS_MODES; + if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) return info->prog_id; if (flags & XDP_FLAGS_DRV_MODE) From 0379861217dc2dd46e3bc517010060065b0dd6fc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 22 Apr 2020 02:01:54 +0000 Subject: [PATCH 072/100] mac80211_hwsim: use GFP_ATOMIC under spin lock A spin lock is taken here so we should use GFP_ATOMIC. Fixes: 5d44fe7c9808 ("mac80211_hwsim: add frame transmission support over virtio") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20200422020154.112088-1-weiyongjun1@huawei.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7c4b7c31d07a..0528d4cb4d37 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4068,7 +4068,7 @@ static void hwsim_virtio_rx_work(struct work_struct *work) } vq = hwsim_vqs[HWSIM_VQ_RX]; sg_init_one(sg, skb->head, skb_end_offset(skb)); - err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); + err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_ATOMIC); if (WARN(err, "virtqueue_add_inbuf returned %d\n", err)) nlmsg_free(skb); else From 829e7573c45a97e0f6a923a8e6e4589c26a26df2 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 21 Apr 2020 12:04:46 +0300 Subject: [PATCH 073/100] net: phy: marvell10g: limit soft reset to 88x3310 The MV_V2_PORT_CTRL_SWRST bit in MV_V2_PORT_CTRL is reserved on 88E2110. Setting SWRST on 88E2110 breaks packets transfer after interface down/up cycle. Fixes: 8f48c2ac85ed ("net: marvell10g: soft-reset the PHY when coming out of low power") Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 95e3f4644aeb..ff12492771ab 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -246,7 +246,8 @@ static int mv3310_power_up(struct phy_device *phydev) ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, MV_V2_PORT_CTRL_PWRDOWN); - if (priv->firmware_ver < 0x00030000) + if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310 || + priv->firmware_ver < 0x00030000) return ret; return phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, From c391eb8366ae052d571bb2841f1ccb4d39f3ceb8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Apr 2020 12:36:41 +0300 Subject: [PATCH 074/100] mlxsw: Fix some IS_ERR() vs NULL bugs The mlxsw_sp_acl_rulei_create() function is supposed to return an error pointer from mlxsw_afa_block_create(). The problem is that these functions both return NULL instead of error pointers. Half the callers expect NULL and half expect error pointers so it could lead to a NULL dereference on failure. This patch changes both of them to return error pointers and changes all the callers which checked for NULL to check for IS_ERR() instead. Fixes: 4cda7d8d7098 ("mlxsw: core: Introduce flexible actions support") Signed-off-by: Dan Carpenter Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 70a104e728f6..c3d04319ff44 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -380,7 +380,7 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa) block = kzalloc(sizeof(*block), GFP_KERNEL); if (!block) - return NULL; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&block->resource_list); block->afa = mlxsw_afa; @@ -408,7 +408,7 @@ err_second_set_create: mlxsw_afa_set_destroy(block->first_set); err_first_set_create: kfree(block); - return NULL; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(mlxsw_afa_block_create); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index 6c66a0f1b79e..ad69913f19c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -88,8 +88,8 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, * to be written using PEFA register to all indexes for all regions. */ afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); - if (!afa_block) { - err = -ENOMEM; + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); goto err_afa_block; } err = mlxsw_afa_block_continue(afa_block); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 67ee880a8727..01cff711bbd2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -464,7 +464,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); if (!rulei) - return NULL; + return ERR_PTR(-ENOMEM); if (afa_block) { rulei->act_block = afa_block; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 346f4a5fe053..221aa6a474eb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -199,8 +199,8 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, int err; afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); - if (!afa_block) - return ERR_PTR(-ENOMEM); + if (IS_ERR(afa_block)) + return afa_block; err = mlxsw_afa_block_append_allocated_counter(afa_block, counter_index); From cc8e7c69db4dcc565ed3020f97ddd6debab6cbe8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 22 Apr 2020 17:29:50 +0200 Subject: [PATCH 075/100] vxlan: use the correct nlattr array in NL_SET_ERR_MSG_ATTR IFLA_VXLAN_* attributes are in the data array, which is correctly used when fetching the value, but not when setting the extended ack. Because IFLA_VXLAN_MAX < IFLA_MAX, we avoid out of bounds array accesses, but we don't provide a pointer to the invalid attribute to userspace. Fixes: 653ef6a3e4af ("vxlan: change vxlan_[config_]validate() to use netlink_ext_ack for error reporting") Fixes: b4d3069783bc ("vxlan: Allow configuration of DF behaviour") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 45308b3350cf..a5b415fed11e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3144,7 +3144,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); if (id >= VXLAN_N_VID) { - NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID], "VXLAN ID must be lower than 16777216"); return -ERANGE; } @@ -3155,7 +3155,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], = nla_data(data[IFLA_VXLAN_PORT_RANGE]); if (ntohs(p->high) < ntohs(p->low)) { - NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE], + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE], "Invalid source port range"); return -EINVAL; } @@ -3165,7 +3165,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]); if (df < 0 || df > VXLAN_DF_MAX) { - NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF], + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF], "Invalid DF attribute"); return -EINVAL; } From 9a7b5b50de8a764671ba1800fe4c52d3b7013901 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 22 Apr 2020 17:29:51 +0200 Subject: [PATCH 076/100] geneve: use the correct nlattr array in NL_SET_ERR_MSG_ATTR IFLA_GENEVE_* attributes are in the data array, which is correctly used when fetching the value, but not when setting the extended ack. Because IFLA_GENEVE_MAX < IFLA_MAX, we avoid out of bounds array accesses, but we don't provide a pointer to the invalid attribute to userspace. Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 09f279c0182b..6b461be1820b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1207,7 +1207,7 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); if (df < 0 || df > GENEVE_DF_MAX) { - NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF], + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], "Invalid DF attribute"); return -EINVAL; } From ce222748078592afb51b810dc154531aeba4f512 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 22 Apr 2020 21:20:07 +0530 Subject: [PATCH 077/100] cxgb4: fix adapter crash due to wrong MC size In the absence of MC1, the size calculation function cudbg_mem_region_size() was returing wrong MC size and resulted in adapter crash. This patch adds new argument to cudbg_mem_region_size() which will have actual size and returns error to caller in the absence of MC1. Fixes: a1c69520f785 ("cxgb4: collect MC memory dump") Signed-off-by: Vishal Kulkarni " Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 19c11568113a..7b9cd69f9844 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1049,9 +1049,9 @@ static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init, } } -static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init, - struct cudbg_error *cudbg_err, - u8 mem_type) +static int cudbg_mem_region_size(struct cudbg_init *pdbg_init, + struct cudbg_error *cudbg_err, + u8 mem_type, unsigned long *region_size) { struct adapter *padap = pdbg_init->adap; struct cudbg_meminfo mem_info; @@ -1060,15 +1060,23 @@ static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init, memset(&mem_info, 0, sizeof(struct cudbg_meminfo)); rc = cudbg_fill_meminfo(padap, &mem_info); - if (rc) + if (rc) { + cudbg_err->sys_err = rc; return rc; + } cudbg_t4_fwcache(pdbg_init, cudbg_err); rc = cudbg_meminfo_get_mem_index(padap, &mem_info, mem_type, &mc_idx); - if (rc) + if (rc) { + cudbg_err->sys_err = rc; return rc; + } - return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base; + if (region_size) + *region_size = mem_info.avail[mc_idx].limit - + mem_info.avail[mc_idx].base; + + return 0; } static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, @@ -1076,7 +1084,12 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, struct cudbg_error *cudbg_err, u8 mem_type) { - unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type); + unsigned long size = 0; + int rc; + + rc = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type, &size); + if (rc) + return rc; return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size, cudbg_err); From 6ed79cec3ced6e346a10a70120fcee5f03591bab Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Thu, 23 Apr 2020 10:16:31 +0800 Subject: [PATCH 078/100] net: ethernet: ixp4xx: Add error handling in ixp4xx_eth_probe() The function ixp4xx_eth_probe() does not perform sufficient error checking after executing devm_ioremap_resource(), which can result in crashes if a critical error path is encountered. Fixes: f458ac479777 ("ARM/net: ixp4xx: Pass ethernet physical base as resource") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 269596c15133..2e5202923510 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1387,6 +1387,8 @@ static int ixp4xx_eth_probe(struct platform_device *pdev) return -ENODEV; regs_phys = res->start; port->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(port->regs)) + return PTR_ERR(port->regs); switch (port->id) { case IXP4XX_ETH_NPEA: From b4e0f9a926ec557cc0b91216957afd1b711bd45f Mon Sep 17 00:00:00 2001 From: Bo YU Date: Thu, 23 Apr 2020 10:10:03 +0800 Subject: [PATCH 079/100] mptcp/pm_netlink.c : add check for nla_put_in/6_addr Normal there should be checked for nla_put_in6_addr like other usage in net. Detected by CoverityScan, CID# 1461639 Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Signed-off-by: Bo YU Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 86d61ab34c7c..b78edf237ba0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -599,12 +599,14 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; - if (addr->family == AF_INET) - nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, - addr->addr.s_addr); + if (addr->family == AF_INET && + nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, + addr->addr.s_addr)) + goto nla_put_failure; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6); + else if (addr->family == AF_INET6 && + nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) + goto nla_put_failure; #endif nla_nest_end(skb, attr); return 0; From f35d12971b4d814cdb2f659d76b42f0c545270b6 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 23 Apr 2020 13:13:03 +0800 Subject: [PATCH 080/100] net/x25: Fix x25_neigh refcnt leak when receiving frame x25_lapb_receive_frame() invokes x25_get_neigh(), which returns a reference of the specified x25_neigh object to "nb" with increased refcnt. When x25_lapb_receive_frame() returns, local variable "nb" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one path of x25_lapb_receive_frame(). When pskb_may_pull() returns false, the function forgets to decrease the refcnt increased by x25_get_neigh(), causing a refcnt leak. Fix this issue by calling x25_neigh_put() when pskb_may_pull() returns false. Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs") Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- net/x25/x25_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 00e782335cb0..25bf72ee6cad 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -115,8 +115,10 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, goto drop; } - if (!pskb_may_pull(skb, 1)) + if (!pskb_may_pull(skb, 1)) { + x25_neigh_put(nb); return 0; + } switch (skb->data[0]) { From d97793af11e48161dee00fa877e34b65e7a20da9 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Thu, 23 Apr 2020 12:18:55 +0530 Subject: [PATCH 081/100] chcr: Fix CPU hard lockup Soft lock should be taken in place of hard lock. Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- drivers/crypto/chelsio/chcr_ktls.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c index cd1769ecdc1c..e92b352fb0ad 100644 --- a/drivers/crypto/chelsio/chcr_ktls.c +++ b/drivers/crypto/chelsio/chcr_ktls.c @@ -120,12 +120,10 @@ out: static int chcr_ktls_update_connection_state(struct chcr_ktls_info *tx_info, int new_state) { - unsigned long flags; - /* This function can be called from both rx (interrupt context) and tx * queue contexts. */ - spin_lock_irqsave(&tx_info->lock, flags); + spin_lock_bh(&tx_info->lock); switch (tx_info->connection_state) { case KTLS_CONN_CLOSED: tx_info->connection_state = new_state; @@ -169,7 +167,7 @@ static int chcr_ktls_update_connection_state(struct chcr_ktls_info *tx_info, pr_err("unknown KTLS connection state\n"); break; } - spin_unlock_irqrestore(&tx_info->lock, flags); + spin_unlock_bh(&tx_info->lock); return tx_info->connection_state; } From 796a8fa28980050bf1995617f0876484f3dc1026 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 23 Apr 2020 08:57:42 +0100 Subject: [PATCH 082/100] net: phy: bcm84881: clear settings on link down Clear the link partner advertisement, speed, duplex and pause when the link goes down, as other phylib drivers do. This avoids the stale link partner, speed and duplex settings being reported via ethtool. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/bcm84881.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c index 3840d2adbbb9..9717a1626f3f 100644 --- a/drivers/net/phy/bcm84881.c +++ b/drivers/net/phy/bcm84881.c @@ -155,9 +155,6 @@ static int bcm84881_read_status(struct phy_device *phydev) if (phydev->autoneg == AUTONEG_ENABLE && !phydev->autoneg_complete) phydev->link = false; - if (!phydev->link) - return 0; - linkmode_zero(phydev->lp_advertising); phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; @@ -165,6 +162,9 @@ static int bcm84881_read_status(struct phy_device *phydev) phydev->asym_pause = 0; phydev->mdix = 0; + if (!phydev->link) + return 0; + if (phydev->autoneg_complete) { val = genphy_c45_read_lpa(phydev); if (val < 0) From 9d3cdd446e702f8c177b2d029fc72676e61a4ae8 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 23 Apr 2020 09:39:03 -0600 Subject: [PATCH 083/100] net: meth: remove spurious copyright text Evidently, at some point in the pre-githistorious past, drivers/net/ethernet/sgi/meth.h somehow contained some code from the "snull" driver from the Linux Device Drivers book. A comment crediting that source, asserting copyright ownership by the LDD authors, and imposing the LDD2 license terms was duly added to the file. Any code that may have been derived from snull is long gone, and the distribution terms are not GPL-compatible. Since the copyright claim is not based in fact (if it ever was), simply remove it and the distribution terms as well. Reported-by: Jan Kiszka Acked-by: Alessandro Rubini CC: Ralf Baechle CC: Kate Stewart CC: "Fendt, Oliver" Signed-off-by: Jonathan Corbet Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/meth.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/net/ethernet/sgi/meth.h b/drivers/net/ethernet/sgi/meth.h index 5b145c6bad60..2ba15c263e8b 100644 --- a/drivers/net/ethernet/sgi/meth.h +++ b/drivers/net/ethernet/sgi/meth.h @@ -1,19 +1,3 @@ - -/* - * snull.h -- definitions for the network module - * - * Copyright (C) 2001 Alessandro Rubini and Jonathan Corbet - * Copyright (C) 2001 O'Reilly & Associates - * - * The source code in this file can be freely used, adapted, - * and redistributed in source or binary form, so long as an - * acknowledgment appears in derived source files. The citation - * should list that the code comes from the book "Linux Device - * Drivers" by Alessandro Rubini and Jonathan Corbet, published - * by O'Reilly & Associates. No warranty is attached; - * we cannot take responsibility for errors or fitness for use. - */ - /* version dependencies have been confined to a separate file */ /* Tunable parameters */ From a6d0b83f25073bdf08b8547aeff961a62c6ab229 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 15:44:17 -0700 Subject: [PATCH 084/100] net: bcmgenet: correct per TX/RX ring statistics The change to track net_device_stats per ring to better support SMP missed updating the rx_dropped member. The ndo_get_stats method is also needed to combine the results for ethtool statistics (-S) before filling in the ethtool structure. Fixes: 37a30b435b92 ("net: bcmgenet: Track per TX/RX rings statistics") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d975338bf78d..c4765bbe527b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -934,6 +934,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, if (netif_running(dev)) bcmgenet_update_mib_counters(priv); + dev->netdev_ops->ndo_get_stats(dev); + for (i = 0; i < BCMGENET_STATS_LEN; i++) { const struct bcmgenet_stats *s; char *p; @@ -3156,6 +3158,7 @@ static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) dev->stats.rx_packets = rx_packets; dev->stats.rx_errors = rx_errors; dev->stats.rx_missed_errors = rx_errors; + dev->stats.rx_dropped = rx_dropped; return &dev->stats; } From 6cb5f3ea4654faf8c28b901266e960b1a4787b26 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2020 11:13:49 +0200 Subject: [PATCH 085/100] mac80211: populate debugfs only after cfg80211 init When fixing the initialization race, we neglected to account for the fact that debugfs is initialized in wiphy_register(), and some debugfs things went missing (or rather were rerooted to the global debugfs root). Fix this by adding debugfs entries only after wiphy_register(). This requires some changes in the rate control code since it currently adds debugfs at alloc time, which can no longer be done after the reordering. Reported-by: Jouni Malinen Reported-by: kernel test robot Reported-by: Hauke Mehrtens Reported-by: Felix Fietkau Cc: stable@vger.kernel.org Fixes: 52e04b4ce5d0 ("mac80211: fix race in ieee80211_register_hw()") Signed-off-by: Johannes Berg Acked-by: Sumit Garg Link: https://lore.kernel.org/r/20200423111344.0e00d3346f12.Iadc76a03a55093d94391fc672e996a458702875d@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/3945-rs.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965-rs.c | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/rs.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rc.c | 2 +- include/net/mac80211.h | 4 +++- net/mac80211/main.c | 5 ++-- net/mac80211/rate.c | 15 ++++-------- net/mac80211/rate.h | 23 +++++++++++++++++++ net/mac80211/rc80211_minstrel_ht.c | 19 ++++++++++----- 10 files changed, 51 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-rs.c b/drivers/net/wireless/intel/iwlegacy/3945-rs.c index 6209f85a71dd..0af9e997c9f6 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-rs.c @@ -374,7 +374,7 @@ out: } static void * -il3945_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +il3945_rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 7c6e2c863497..0a02d8aca320 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -2474,7 +2474,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta, } static void * -il4965_rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +il4965_rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c index 226165db7dfd..dac809df7f1d 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c @@ -3019,7 +3019,7 @@ static void rs_fill_link_cmd(struct iwl_priv *priv, cpu_to_le16(priv->lib->bt_params->agg_time_limit); } -static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index c1aba2bf73cf..00e7fdbaeb7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -3665,7 +3665,7 @@ static void rs_fill_lq_cmd(struct iwl_mvm *mvm, cpu_to_le16(iwl_mvm_coex_agg_time_limit(mvm, sta)); } -static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rs_alloc(struct ieee80211_hw *hw) { return hw->priv; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c index 0c7d74902d33..4b5ea0ec9109 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rc.c +++ b/drivers/net/wireless/realtek/rtlwifi/rc.c @@ -261,7 +261,7 @@ static void rtl_rate_update(void *ppriv, { } -static void *rtl_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +static void *rtl_rate_alloc(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); return rtlpriv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b6b4de0e4b5e..97fec4d310ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6007,7 +6007,9 @@ enum rate_control_capabilities { struct rate_control_ops { unsigned long capa; const char *name; - void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); + void *(*alloc)(struct ieee80211_hw *hw); + void (*add_debugfs)(struct ieee80211_hw *hw, void *priv, + struct dentry *debugfsdir); void (*free)(void *priv); void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0e9ad60fb2b3..6423173bb87e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1183,8 +1183,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, IEEE80211_TX_STATUS_HEADROOM); - debugfs_hw_add(local); - /* * if the driver doesn't specify a max listen interval we * use 5 which should be a safe default @@ -1273,6 +1271,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_wiphy_register; + debugfs_hw_add(local); + rate_control_add_debugfs(local); + rtnl_lock(); /* add one default STA interface if supported */ diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index a1e9fc7878aa..b051f125d3af 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -214,17 +214,16 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf, ref->ops->name, len); } -static const struct file_operations rcname_ops = { +const struct file_operations rcname_ops = { .read = rcname_read, .open = simple_open, .llseek = default_llseek, }; #endif -static struct rate_control_ref *rate_control_alloc(const char *name, - struct ieee80211_local *local) +static struct rate_control_ref * +rate_control_alloc(const char *name, struct ieee80211_local *local) { - struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); @@ -234,13 +233,7 @@ static struct rate_control_ref *rate_control_alloc(const char *name, if (!ref->ops) goto free; -#ifdef CONFIG_MAC80211_DEBUGFS - debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); - local->debugfs.rcdir = debugfsdir; - debugfs_create_file("name", 0400, debugfsdir, ref, &rcname_ops); -#endif - - ref->priv = ref->ops->alloc(&local->hw, debugfsdir); + ref->priv = ref->ops->alloc(&local->hw); if (!ref->priv) goto free; return ref; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5397c6dad056..79b44d3db171 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -60,6 +60,29 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) #endif } +extern const struct file_operations rcname_ops; + +static inline void rate_control_add_debugfs(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *debugfsdir; + + if (!local->rate_ctrl) + return; + + if (!local->rate_ctrl->ops->add_debugfs) + return; + + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + debugfs_create_file("name", 0400, debugfsdir, + local->rate_ctrl, &rcname_ops); + + local->rate_ctrl->ops->add_debugfs(&local->hw, local->rate_ctrl->priv, + debugfsdir); +#endif +} + void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata); /* Get a reference to the rate control algorithm. If `name' is NULL, get the diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 694a31978a04..5dc3e5bc4e64 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1635,7 +1635,7 @@ minstrel_ht_init_cck_rates(struct minstrel_priv *mp) } static void * -minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; @@ -1673,7 +1673,17 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->update_interval = HZ / 10; mp->new_avg = true; + minstrel_ht_init_cck_rates(mp); + + return mp; +} + #ifdef CONFIG_MAC80211_DEBUGFS +static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, + struct dentry *debugfsdir) +{ + struct minstrel_priv *mp = priv; + mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); @@ -1681,12 +1691,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) &mp->sample_switch); debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir, &mp->new_avg); -#endif - - minstrel_ht_init_cck_rates(mp); - - return mp; } +#endif static void minstrel_ht_free(void *priv) @@ -1725,6 +1731,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .alloc = minstrel_ht_alloc, .free = minstrel_ht_free, #ifdef CONFIG_MAC80211_DEBUGFS + .add_debugfs = minstrel_ht_add_debugfs, .add_sta_debugfs = minstrel_ht_add_sta_debugfs, #endif .get_expected_throughput = minstrel_ht_get_expected_throughput, From 8ca47eb9f9e4e10e7e7fa695731a88941732c38d Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Thu, 9 Apr 2020 13:59:06 +0530 Subject: [PATCH 086/100] mac80211: sta_info: Add lockdep condition for RCU list usage The function sta_info_get_by_idx() uses RCU list primitive. It is called with local->sta_mtx held from mac80211/cfg.c. Add lockdep expression to avoid any false positive RCU list warnings. Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200409082906.27427-1-madhuparnabhowmik10@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f8d5c2515829..cd8487bc6fc2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -231,7 +231,8 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; int i = 0; - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry_rcu(sta, &local->sta_list, list, + lockdep_is_held(&local->sta_mtx)) { if (sdata != sta->sdata) continue; if (i < idx) { From 7f327080364abccf923fa5a5b24e038eb0ba1407 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 23 Apr 2020 13:40:47 +0000 Subject: [PATCH 087/100] macsec: avoid to set wrong mtu When a macsec interface is created, the mtu is calculated with the lower interface's mtu value. If the mtu of lower interface is lower than the length, which is needed by macsec interface, macsec's mtu value will be overflowed. So, if the lower interface's mtu is too low, macsec interface's mtu should be set to 0. Test commands: ip link add dummy0 mtu 10 type dummy ip link add macsec0 link dummy0 type macsec ip link show macsec0 Before: 11: macsec0@dummy0: mtu 4294967274 After: 11: macsec0@dummy0: mtu 0 Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/macsec.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a183250ff66a..758baf7cb8a1 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -4002,11 +4002,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); - struct net_device *real_dev; - int err; - sci_t sci; - u8 icv_len = DEFAULT_ICV_LEN; rx_handler_func_t *rx_handler; + u8 icv_len = DEFAULT_ICV_LEN; + struct net_device *real_dev; + int err, mtu; + sci_t sci; if (!tb[IFLA_LINK]) return -EINVAL; @@ -4033,7 +4033,11 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); - dev->mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + mtu = real_dev->mtu - icv_len - macsec_extra_len(true); + if (mtu < 0) + dev->mtu = 0; + else + dev->mtu = mtu; rx_handler = rtnl_dereference(real_dev->rx_handler); if (rx_handler && rx_handler != macsec_handle_frame) From ecaeceb8a8a145d93c7e136f170238229165348f Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 16:02:11 -0700 Subject: [PATCH 088/100] net: bcmgenet: suppress warnings on failed Rx SKB allocations The driver is designed to drop Rx packets and reclaim the buffers when an allocation fails, and the network interface needs to safely handle this packet loss. Therefore, an allocation failure of Rx SKBs is relatively benign. However, the output of the warning message occurs with a high scheduling priority that can cause excessive jitter/latency for other high priority processing. This commit suppresses the warning messages to prevent scheduling problems while retaining the failure count in the statistics of the network interface. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c4765bbe527b..79636c78127c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1624,7 +1624,8 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, dma_addr_t mapping; /* Allocate a new Rx skb */ - skb = netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT); + skb = __netdev_alloc_skb(priv->dev, priv->rx_buf_len + SKB_ALIGNMENT, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, priv->dev, From 3554e54a46125030c534820c297ed7f6c3907e24 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 23 Apr 2020 16:13:30 -0700 Subject: [PATCH 089/100] net: systemport: suppress warnings on failed Rx SKB allocations The driver is designed to drop Rx packets and reclaim the buffers when an allocation fails, and the network interface needs to safely handle this packet loss. Therefore, an allocation failure of Rx SKBs is relatively benign. However, the output of the warning message occurs with a high scheduling priority that can cause excessive jitter/latency for other high priority processing. This commit suppresses the warning messages to prevent scheduling problems while retaining the failure count in the statistics of the network interface. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index c99e5a3fa746..b25356e21a1e 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -664,7 +664,8 @@ static struct sk_buff *bcm_sysport_rx_refill(struct bcm_sysport_priv *priv, dma_addr_t mapping; /* Allocate a new SKB for a new packet */ - skb = netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH); + skb = __netdev_alloc_skb(priv->netdev, RX_BUF_LENGTH, + GFP_ATOMIC | __GFP_NOWARN); if (!skb) { priv->mib.alloc_rx_buff_failed++; netif_err(priv, rx_err, ndev, "SKB alloc failed\n"); From a33d3147945543f9ded67a052f358a75595f1ecb Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 22 Apr 2020 10:23:24 +0200 Subject: [PATCH 090/100] bpf: Fix reStructuredText markup The patch fixes: $ scripts/bpf_helpers_doc.py > bpf-helpers.rst $ rst2man bpf-helpers.rst > bpf-helpers.7 bpf-helpers.rst:1105: (WARNING/2) Inline strong start-string without end-string. Signed-off-by: Jakub Wilk Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200422082324.2030-1-jwilk@jwilk.net --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e29a671d67e..7bbf1b65be10 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1642,7 +1642,7 @@ union bpf_attr { * ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits - * of the **flags* argument on error. + * of the *flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2e29a671d67e..7bbf1b65be10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1642,7 +1642,7 @@ union bpf_attr { * ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits - * of the **flags* argument on error. + * of the *flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description From 5fa9a98fb10380e48a398998cd36a85e4ef711d6 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Wed, 22 Apr 2020 10:36:29 -0700 Subject: [PATCH 091/100] bpf, x86_32: Fix incorrect encoding in BPF_LDX zero-extension The current JIT uses the following sequence to zero-extend into the upper 32 bits of the destination register for BPF_LDX BPF_{B,H,W}, when the destination register is not on the stack: EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); The problem is that C7 /0 encodes a MOV instruction that requires a 4-byte immediate; the current code emits only 1 byte of the immediate. This means that the first 3 bytes of the next instruction will be treated as the rest of the immediate, breaking the stream of instructions. This patch fixes the problem by instead emitting "xor dst_hi,dst_hi" to clear the upper 32 bits. This fixes the problem and is more efficient than using MOV to load a zero immediate. This bug may not be currently triggerable as BPF_REG_AX is the only register not stored on the stack and the verifier uses it in a limited way, and the verifier implements a zero-extension optimization. But the JIT should avoid emitting incorrect encodings regardless. Fixes: 03f5781be2c7b ("bpf, x86_32: add eBPF JIT compiler for ia32") Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Alexei Starovoitov Reviewed-by: H. Peter Anvin (Intel) Acked-by: Wang YanQing Link: https://lore.kernel.org/bpf/20200422173630.8351-1-luke.r.nels@gmail.com --- arch/x86/net/bpf_jit_comp32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 4d2a7a764602..cc9ad3892ea6 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1854,7 +1854,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, STACK_VAR(dst_hi)); EMIT(0x0, 4); } else { - EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); + /* xor dst_hi,dst_hi */ + EMIT2(0x33, + add_2reg(0xC0, dst_hi, dst_hi)); } break; case BPF_DW: From 50fe7ebb6475711c15b3397467e6424e20026d94 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Wed, 22 Apr 2020 10:36:30 -0700 Subject: [PATCH 092/100] bpf, x86_32: Fix clobbering of dst for BPF_JSET The current JIT clobbers the destination register for BPF_JSET BPF_X and BPF_K by using "and" and "or" instructions. This is fine when the destination register is a temporary loaded from a register stored on the stack but not otherwise. This patch fixes the problem (for both BPF_K and BPF_X) by always loading the destination register into temporaries since BPF_JSET should not modify the destination register. This bug may not be currently triggerable as BPF_REG_AX is the only register not stored on the stack and the verifier uses it in a limited way. Fixes: 03f5781be2c7b ("bpf, x86_32: add eBPF JIT compiler for ia32") Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Alexei Starovoitov Acked-by: Wang YanQing Link: https://lore.kernel.org/bpf/20200422173630.8351-2-luke.r.nels@gmail.com --- arch/x86/net/bpf_jit_comp32.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index cc9ad3892ea6..ba7d9ccfc662 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2015,8 +2015,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; - u8 dreg_lo = dstk ? IA32_EAX : dst_lo; - u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 dreg_lo = IA32_EAX; + u8 dreg_hi = IA32_EDX; u8 sreg_lo = sstk ? IA32_ECX : src_lo; u8 sreg_hi = sstk ? IA32_EBX : src_hi; @@ -2028,6 +2028,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst_hi)); + } else { + /* mov dreg_lo,dst_lo */ + EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo)); + if (is_jmp64) + /* mov dreg_hi,dst_hi */ + EMIT2(0x89, + add_2reg(0xC0, dreg_hi, dst_hi)); } if (sstk) { @@ -2052,8 +2059,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; - u8 dreg_lo = dstk ? IA32_EAX : dst_lo; - u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 dreg_lo = IA32_EAX; + u8 dreg_hi = IA32_EDX; u8 sreg_lo = IA32_ECX; u8 sreg_hi = IA32_EBX; u32 hi; @@ -2066,6 +2073,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst_hi)); + } else { + /* mov dreg_lo,dst_lo */ + EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo)); + if (is_jmp64) + /* mov dreg_hi,dst_hi */ + EMIT2(0x89, + add_2reg(0xC0, dreg_hi, dst_hi)); } /* mov ecx,imm32 */ From 5ca1ca01fae1e90f8d010eb1d83374f28dc11ee6 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 23 Apr 2020 13:06:37 +0800 Subject: [PATCH 093/100] bpf, x86_32: Fix logic error in BPF_LDX zero-extension When verifier_zext is true, we don't need to emit code for zero-extension. Fixes: 836256bf5f37 ("x32: bpf: eliminate zero extension code-gen") Signed-off-by: Wang YanQing Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200423050637.GA4029@udknight --- arch/x86/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index ba7d9ccfc662..66cd150b7e54 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1847,7 +1847,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_B: case BPF_H: case BPF_W: - if (!bpf_prog->aux->verifier_zext) + if (bpf_prog->aux->verifier_zext) break; if (dstk) { EMIT3(0xC7, add_1reg(0x40, IA32_EBP), From 4adb7a4a151c65ac7e9c3a1aa462c84190d48385 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Apr 2020 22:20:44 -0700 Subject: [PATCH 094/100] bpf: Fix leak in LINK_UPDATE and enforce empty old_prog_fd Fix bug of not putting bpf_link in LINK_UPDATE command. Also enforce zeroed old_prog_fd if no BPF_F_REPLACE flag is specified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200424052045.4002963-1-andriin@fb.com --- kernel/bpf/syscall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d85f37239540..bca58c235ac0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3628,8 +3628,10 @@ static int link_update(union bpf_attr *attr) return PTR_ERR(link); new_prog = bpf_prog_get(attr->link_update.new_prog_fd); - if (IS_ERR(new_prog)) - return PTR_ERR(new_prog); + if (IS_ERR(new_prog)) { + ret = PTR_ERR(new_prog); + goto out_put_link; + } if (flags & BPF_F_REPLACE) { old_prog = bpf_prog_get(attr->link_update.old_prog_fd); @@ -3638,6 +3640,9 @@ static int link_update(union bpf_attr *attr) old_prog = NULL; goto out_put_progs; } + } else if (attr->link_update.old_prog_fd) { + ret = -EINVAL; + goto out_put_progs; } #ifdef CONFIG_CGROUP_BPF @@ -3653,6 +3658,8 @@ out_put_progs: bpf_prog_put(old_prog); if (ret) bpf_prog_put(new_prog); +out_put_link: + bpf_link_put(link); return ret; } From 03f87c0b45b177ba5f6b4a9bbe9f95e4aba31026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 24 Apr 2020 15:34:27 +0200 Subject: [PATCH 095/100] bpf: Propagate expected_attach_type when verifying freplace programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some program types, the verifier relies on the expected_attach_type of the program being verified in the verification process. However, for freplace programs, the attach type was not propagated along with the verifier ops, so the expected_attach_type would always be zero for freplace programs. This in turn caused the verifier to sometimes make the wrong call for freplace programs. For all existing uses of expected_attach_type for this purpose, the result of this was only false negatives (i.e., freplace functions would be rejected by the verifier even though they were valid programs for the target they were replacing). However, should a false positive be introduced, this can lead to out-of-bounds accesses and/or crashes. The fix introduced in this patch is to propagate the expected_attach_type to the freplace program during verification, and reset it after that is done. Fixes: be8704ff07d2 ("bpf: Introduce dynamic program extensions") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158773526726.293902.13257293296560360508.stgit@toke.dk --- kernel/bpf/verifier.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9382609147f5..fa1d8245b925 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10497,6 +10497,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } env->ops = bpf_verifier_ops[tgt_prog->type]; + prog->expected_attach_type = tgt_prog->expected_attach_type; } if (!tgt_prog->jited) { verbose(env, "Can attach to only JITed progs\n"); @@ -10841,6 +10842,13 @@ err_release_maps: * them now. Otherwise free_used_maps() will release them. */ release_maps(env); + + /* extension progs temporarily inherit the attach_type of their targets + for verification purposes, so set it back to zero before returning + */ + if (env->prog->type == BPF_PROG_TYPE_EXT) + env->prog->expected_attach_type = 0; + *prog = env->prog; err_unlock: if (!is_priv) From 1d8a0af5ee1ad219a9ebd9b43559b165bcb3ff40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 24 Apr 2020 15:34:28 +0200 Subject: [PATCH 096/100] selftests/bpf: Add test for freplace program with expected_attach_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new selftest that tests the ability to attach an freplace program to a program type that relies on the expected_attach_type of the target program to pass verification. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158773526831.293902.16011743438619684815.stgit@toke.dk --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 30 +++++++++++++++---- .../selftests/bpf/progs/connect4_prog.c | 28 +++++++++-------- .../selftests/bpf/progs/freplace_connect4.c | 18 +++++++++++ 3 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/freplace_connect4.c diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index cde463af7071..c2642517e1d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -5,7 +5,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, int prog_cnt, - const char **prog_name) + const char **prog_name, + bool run_prog) { struct bpf_object *obj = NULL, *pkt_obj; int err, pkt_fd, i; @@ -18,7 +19,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + target_obj_file, err, errno)) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .attach_prog_fd = pkt_fd, @@ -33,7 +35,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, obj = bpf_object__open_file(obj_file, &opts); if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open fexit_bpf2bpf: %ld\n", + "failed to open %s: %ld\n", obj_file, PTR_ERR(obj))) goto close_prog; @@ -49,6 +51,10 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) goto close_prog; } + + if (!run_prog) + goto close_prog; + data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto close_prog; @@ -89,7 +95,7 @@ static void test_target_no_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", "./test_pkt_md_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); } static void test_target_yes_callees(void) @@ -103,7 +109,7 @@ static void test_target_yes_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); } static void test_func_replace(void) @@ -120,7 +126,18 @@ static void test_func_replace(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); +} + +static void test_func_replace_verify(void) +{ + const char *prog_name[] = { + "freplace/do_bind", + }; + test_fexit_bpf2bpf_common("./freplace_connect4.o", + "./connect4_prog.o", + ARRAY_SIZE(prog_name), + prog_name, false); } void test_fexit_bpf2bpf(void) @@ -128,4 +145,5 @@ void test_fexit_bpf2bpf(void) test_target_no_callees(); test_target_yes_callees(); test_func_replace(); + test_func_replace_verify(); } diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 75085119c5bb..ad3c498a8150 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -18,11 +18,25 @@ int _version SEC("version") = 1; +__attribute__ ((noinline)) +int do_bind(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = {}; + + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + return 1; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { struct bpf_sock_tuple tuple = {}; - struct sockaddr_in sa; struct bpf_sock *sk; /* Verify that new destination is available. */ @@ -56,17 +70,7 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); - - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - - return 1; + return do_bind(ctx) ? 1 : 0; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_connect4.c b/tools/testing/selftests/bpf/progs/freplace_connect4.c new file mode 100644 index 000000000000..a0ae84230699 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_connect4.c @@ -0,0 +1,18 @@ +#include +#include +#include +#include +#include +#include +#include + +SEC("freplace/do_bind") +int new_do_bind(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = {}; + + bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)); + return 0; +} + +char _license[] SEC("license") = "GPL"; From 32e4c6f4bc00366ede053ab6bb8109ec0fae0e54 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 24 Apr 2020 11:29:11 -0700 Subject: [PATCH 097/100] bpftool: Respect the -d option in struct_ops cmd In the prog cmd, the "-d" option turns on the verifier log. This is missed in the "struct_ops" cmd and this patch fixes it. Fixes: 65c93628599d ("bpftool: Add struct_ops support") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200424182911.1259355-1-kafai@fb.com --- tools/bpf/bpftool/struct_ops.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 0fe0d584c57e..e17738479edc 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -479,6 +479,7 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { + struct bpf_object_load_attr load_attr = {}; const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); @@ -499,7 +500,12 @@ static int do_register(int argc, char **argv) set_max_rlimit(); - if (bpf_object__load(obj)) { + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + if (bpf_object__load_xattr(&load_attr)) { bpf_object__close(obj); return -1; } From 6f302bfb221470e712ce3e5911fb83cdca174387 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 10:32:40 +0800 Subject: [PATCH 098/100] bpf: Make bpf_link_fops static Fix the following sparse warning: kernel/bpf/syscall.c:2289:30: warning: symbol 'bpf_link_fops' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1587609160-117806-1-git-send-email-zou_wei@huawei.com --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bca58c235ac0..7626b8024471 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2283,7 +2283,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) } #endif -const struct file_operations bpf_link_fops = { +static const struct file_operations bpf_link_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_link_show_fdinfo, #endif From dfc55ace9939e1e8703ad37ddbba41aaa31cc0cb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Apr 2020 18:24:07 -0700 Subject: [PATCH 099/100] tools/runqslower: Ensure own vmlinux.h is picked up first Reorder include paths to ensure that runqslower sources are picking up vmlinux.h, generated by runqslower's own Makefile. When runqslower is built from selftests/bpf, due to current -I$(BPF_INCLUDE) -I$(OUTPUT) ordering, it might pick up not-yet-complete vmlinux.h, generated by selftests Makefile, which could lead to compilation errors like [0]. So ensure that -I$(OUTPUT) goes first and rely on runqslower's Makefile own dependency chain to ensure vmlinux.h is properly completed before source code relying on it is compiled. [0] https://travis-ci.org/github/libbpf/libbpf/jobs/677905925 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200422012407.176303-1-andriin@fb.com --- tools/bpf/runqslower/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 39edd68afa8e..8a6f82e56a24 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -8,7 +8,7 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ := $(OUTPUT)/libbpf.a BPF_INCLUDE := $(OUTPUT) -INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib) +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) CFLAGS := -g -Wall # Try to detect best kernel BTF source From e1cebd841b0aa1ceda771706d54a0501986a3c88 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 21 Apr 2020 17:37:53 -0700 Subject: [PATCH 100/100] selftests/bpf: Fix a couple of broken test_btf cases Commit 51c39bb1d5d1 ("bpf: Introduce function-by-function verification") introduced function linkage flag and changed the error message from "vlen != 0" to "Invalid func linkage" and broke some fake BPF programs. Adjust the test accordingly. AFACT, the programs don't really need any arguments and only look at BTF for maps, so let's drop the args altogether. Before: BTF raw test[103] (func (Non zero vlen)): do_test_raw:3703:FAIL expected err_str:vlen != 0 magic: 0xeb9f version: 1 flags: 0x0 hdr_len: 24 type_off: 0 type_len: 72 str_off: 72 str_len: 10 btf_total_size: 106 [1] INT (anon) size=4 bits_offset=0 nr_bits=32 encoding=SIGNED [2] INT (anon) size=4 bits_offset=0 nr_bits=32 encoding=(none) [3] FUNC_PROTO (anon) return=0 args=(1 a, 2 b) [4] FUNC func type_id=3 Invalid func linkage BTF libbpf test[1] (test_btf_haskv.o): libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: Validating test_long_fname_2() func#1... Arg#0 type PTR in test_long_fname_2() is not supported yet. processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 libbpf: -- END LOG -- libbpf: failed to load program 'dummy_tracepoint' libbpf: failed to load object 'test_btf_haskv.o' do_test_file:4201:FAIL bpf_object__load: -4007 BTF libbpf test[2] (test_btf_newkv.o): libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: Validating test_long_fname_2() func#1... Arg#0 type PTR in test_long_fname_2() is not supported yet. processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 libbpf: -- END LOG -- libbpf: failed to load program 'dummy_tracepoint' libbpf: failed to load object 'test_btf_newkv.o' do_test_file:4201:FAIL bpf_object__load: -4007 BTF libbpf test[3] (test_btf_nokv.o): libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: Validating test_long_fname_2() func#1... Arg#0 type PTR in test_long_fname_2() is not supported yet. processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 libbpf: -- END LOG -- libbpf: failed to load program 'dummy_tracepoint' libbpf: failed to load object 'test_btf_nokv.o' do_test_file:4201:FAIL bpf_object__load: -4007 Fixes: 51c39bb1d5d1 ("bpf: Introduce function-by-function verification") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200422003753.124921-1-sdf@google.com --- .../selftests/bpf/progs/test_btf_haskv.c | 18 +++++------------- .../selftests/bpf/progs/test_btf_newkv.c | 18 +++++------------- .../selftests/bpf/progs/test_btf_nokv.c | 18 +++++------------- tools/testing/selftests/bpf/test_btf.c | 2 +- 4 files changed, 16 insertions(+), 40 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 88b0566da13d..31538c9ed193 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -20,20 +20,12 @@ struct bpf_map_def SEC("maps") btf_map = { BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -44,15 +36,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index a924e53c8e9d..6c5560162746 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -28,20 +28,12 @@ struct { __type(value, struct ipv_counts); } btf_map SEC(".maps"); -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -57,15 +49,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 983aedd1c072..506da7fd2da2 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -17,20 +17,12 @@ struct bpf_map_def SEC("maps") btf_map = { .max_entries = 4, }; -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -41,15 +33,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8da77cda5f4a..305fae8f80a9 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2854,7 +2854,7 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .err_str = "vlen != 0", + .err_str = "Invalid func linkage", }, {