From 955d3411a17f590364238bd0d3329b61f20c1cd2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 30 Dec 2018 12:46:01 +0100 Subject: [PATCH 0001/1436] batman-adv: Avoid WARN on net_device without parent in netns It is not allowed to use WARN* helpers on potential incorrect input from the user or transient problems because systems configured as panic_on_warn will reboot due to such a problem. A NULL return value of __dev_get_by_index can be caused by various problems which can either be related to the system configuration or problems (incorrectly returned network namespaces) in other (virtual) net_device drivers. batman-adv should not cause a (harmful) WARN in this situation and instead only report it via a simple message. Fixes: b7eddd0b3950 ("batman-adv: prevent using any virtual device created on batman-adv as hard-interface") Reported-by: syzbot+c764de0fcfadca9a8595@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 508f4416dfc9..415d494cbe22 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -20,7 +20,6 @@ #include "main.h" #include -#include #include #include #include @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; From 9114daa825fc3f335f9bea3313ce667090187280 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 31 Dec 2018 22:31:01 +0100 Subject: [PATCH 0002/1436] batman-adv: Force mac header to start of data on xmit The caller of ndo_start_xmit may not already have called skb_reset_mac_header. The returned value of skb_mac_header/eth_hdr therefore can be in the wrong position and even outside the current skbuff. This for example happens when the user binds to the device using a PF_PACKET-SOCK_RAW with enabled qdisc-bypass: int opt = 4; setsockopt(sock, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof(opt)); Since eth_hdr is used all over the codebase, the batadv_interface_tx function must always take care of resetting it. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Reported-by: syzbot+9d7405c7faa390e60b4e@syzkaller.appspotmail.com Reported-by: syzbot+7d20bc3f1ddddc0f9079@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5db5a0a4c959..b85ca809e509 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -221,6 +221,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { From c9a8a92a9961dcd822015ee406e1a011b772112e Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 2 Jan 2019 20:35:52 +0100 Subject: [PATCH 0003/1436] ARM: dts: rockchip: remove qos_cif1 from rk3188 power-domain While the rk3066 does have 2 camera interfaces, the rk3188 does not, so there also isn't a QoS block for that non-existing interface, so remove it. Fixes: e6e1869f0b71 ("ARM: dts: rockchip: add rk3066/rk3188 power-domains") Signed-off-by: Johan Jonker Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3188.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index 4acb501dd3f8..3ed49898f4b2 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -719,7 +719,6 @@ pm_qos = <&qos_lcdc0>, <&qos_lcdc1>, <&qos_cif0>, - <&qos_cif1>, <&qos_ipp>, <&qos_rga>; }; From 4fb873c9648e383206e0a91cef9b03aa54066aca Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 5 Nov 2018 16:36:07 +0100 Subject: [PATCH 0004/1436] drm/imx: ignore plane updates on disabled crtcs This patch fixes backtraces like the following when sending SIGKILL to a process with a currently pending plane update: [drm:ipu_plane_atomic_check] CRTC should be enabled [drm:drm_framebuffer_remove] *ERROR* failed to commit ------------[ cut here ]------------ WARNING: CPU: 3 PID: 63 at drivers/gpu/drm/drm_framebuffer.c:926 drm_framebuffer_remove+0x47c/0x498 atomic remove_fb failed with -22 Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index c390924de93d..21e964f6ab5c 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -370,9 +370,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (ret) return ret; - /* CRTC should be enabled */ + /* nothing to check when disabling or disabled */ if (!crtc_state->enable) - return -EINVAL; + return 0; switch (plane->type) { case DRM_PLANE_TYPE_PRIMARY: From 4ab88516b097ac8c23e00e7c3a26fbcd16feeefd Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Mon, 10 Dec 2018 11:14:29 +0100 Subject: [PATCH 0005/1436] arm64: dts: allwinner: a64: Fix the video engine compatible When introducing the video-codec node for the video engine, the compatible for the H5 was used instead of the compatible for the A64. Use the right compatible instead. Fixes: d60ce24740d2 ("arm64: dts: allwinner: a64: Add Video Engine node") Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 837a03dee875..2abb335145a6 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -390,7 +390,7 @@ }; video-codec@1c0e000 { - compatible = "allwinner,sun50i-h5-video-engine"; + compatible = "allwinner,sun50i-a64-video-engine"; reg = <0x01c0e000 0x1000>; clocks = <&ccu CLK_BUS_VE>, <&ccu CLK_VE>, <&ccu CLK_DRAM_VE>; From 8443e4843e1c2594bf5664e1d993a1be71d1befb Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 23 Dec 2018 20:24:13 +0200 Subject: [PATCH 0006/1436] ARM: OMAP: dts: N950/N9: fix onenand timings Commit a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") started using DT specified timings for GPMC, and as a result the OneNAND stopped working on N950/N9 as we had wrong values in the DT. Fix by updating the values to bootloader timings that have been tested to be working on both Nokia N950 and N9. Fixes: a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 42 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 0d9b85317529..e142e6c70a59 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -370,6 +370,19 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ + /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v4.19 using both N950 + * and N9 devices (OneNAND Manufacturer: Samsung): + * + * gpmc cs0 before gpmc_cs_program_settings: + * cs0 GPMC_CS_CONFIG1: 0xfd001202 + * cs0 GPMC_CS_CONFIG2: 0x00181800 + * cs0 GPMC_CS_CONFIG3: 0x00030300 + * cs0 GPMC_CS_CONFIG4: 0x18001804 + * cs0 GPMC_CS_CONFIG5: 0x03171d1d + * cs0 GPMC_CS_CONFIG6: 0x97080000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; @@ -379,26 +392,27 @@ gpmc,device-width = <2>; gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <122>; + gpmc,cs-wr-off-ns = <122>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <15>; + gpmc,adv-wr-off-ns = <15>; + gpmc,oe-on-ns = <20>; + gpmc,oe-off-ns = <122>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; + gpmc,we-off-ns = <122>; + gpmc,rd-cycle-ns = <148>; + gpmc,wr-cycle-ns = <148>; + gpmc,access-ns = <117>; gpmc,page-burst-access-ns = <15>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; - gpmc,sync-clk-ps = <15000>; + gpmc,clk-activation-ns = <10>; + gpmc,wr-data-mux-bus-ns = <40>; + gpmc,wr-access-ns = <117>; + + gpmc,sync-clk-ps = <15000>; /* TBC; Where this value came? */ /* * MTD partition table corresponding to Nokia's MeeGo 1.2 From ef4a55b9197a8f844ea0663138e902dcce3e2f36 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Jan 2019 09:52:43 -0800 Subject: [PATCH 0007/1436] ARM: dts: omap4-droid4: Fix typo in cpcap IRQ flags We're now getting the following error: genirq: Setting trigger mode 1 for irq 230 failed (regmap_irq_set_type+0x0/0x15c) cpcap-usb-phy cpcap-usb-phy.0: could not get irq dp: -524 Cc: Sebastian Reichel Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index ddc7a7bb33c0..f57acf8f66b9 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -105,7 +105,7 @@ interrupts-extended = < &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 + &cpcap 48 0 >; interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld", From 2bb7babaae8780dd257abeb354c70cc176fd9dfa Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Jan 2019 13:42:40 -0800 Subject: [PATCH 0008/1436] ARM: dts: Remove unnecessary idle flags for omap5 uart3 Looks like I accidentally left some extra flags for uart3 to not idle it. This happened as I generated the data from a running system where these flags are set dynamically on boot by omap_hwmod_setup_earlycon_flags() if earlycon is enabled. We can just remove them. Fixes: 4c387984618f ("ARM: dts: omap5: Add l4 interconnect hierarchy and ti-sysc data") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-l4.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/omap5-l4.dtsi b/arch/arm/boot/dts/omap5-l4.dtsi index 9c7e309d9c2c..0960348002ad 100644 --- a/arch/arm/boot/dts/omap5-l4.dtsi +++ b/arch/arm/boot/dts/omap5-l4.dtsi @@ -1046,8 +1046,6 @@ , ; ti,syss-mask = <1>; - ti,no-reset-on-init; - ti,no-idle-on-init; /* Domains (V, P, C): core, core_pwrdm, l4per_clkdm */ clocks = <&l4per_clkctrl OMAP5_UART3_CLKCTRL 0>; clock-names = "fck"; From aa9ad54285c78e7f69b20b3734c0c79aa98af3c3 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 8 Jan 2019 15:25:26 +0800 Subject: [PATCH 0009/1436] ARM: dts: sun6i: Add clock-output-names to osc24M clock The osc24M clock does not have a "clock-output-names" property, which means that the clock name is derived from the node name in Linux. The node name was changed in commit acfd5bbe2641 ("ARM: dts: sun6i: Change clock node names to avoid warnings"). This breaks Linux as the sunxi-ng clock driver implicitly depends on the external clock being named "osc24M". Add a "clock-output-names" property to restore the previous behavior. Fixes: acfd5bbe2641 ("ARM: dts: sun6i: Change clock node names to avoid warnings") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 353d90f99b40..13304b8c5139 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -216,6 +216,7 @@ #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <24000000>; + clock-output-names = "osc24M"; }; osc32k: clk-32k { From cc4bddade114b696ab27c1a77cfc7040151306da Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 8 Jan 2019 20:18:40 +0100 Subject: [PATCH 0010/1436] ARM: dts: sun8i: h3: Add ethernet0 alias to Beelink X2 Because "ethernet0" alias is missing, U-Boot doesn't generate board specific MAC address. Effect of this is random MAC address every boot and thus new IP address is assigned to the board. Fix this by adding alias. Fixes: 7389172fc3ed ("ARM: dts: sun8i: h3: Enable dwmac-sun8i on the Beelink X2") Signed-off-by: Jernej Skrabec [Maxime: Removed unneeded comment] Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-h3-beelink-x2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 5d23667dc2d2..25540b7694d5 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -53,7 +53,7 @@ aliases { serial0 = &uart0; - /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ + ethernet0 = &emac; ethernet1 = &sdiowifi; }; From b1360dcfdaa1d55952e5ec8dd9d99f88965d7ac9 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 9 Jan 2019 19:16:04 +0100 Subject: [PATCH 0011/1436] arm64: dts: allwinner: a64: Fix USB OTG regulator Currently, AXP803 driver assumes that reg_drivevbus is input which is wrong. Unfortunate consequence of that is that none of the USB ports work on the board, even USB HOST port, because USB PHY driver probing fails due to missing regulator. Fix that by adding "x-powers,drive-vbus-en" property to AXP803 node. Fixes: 14ff5d8f9151 ("arm64: dts: allwinner: a64: Orange Pi Win: Enable USB OTG socket") Cc: stable@vger.kernel.org Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index b0c64f75792c..8974b5a1d3b1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -188,6 +188,7 @@ reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + x-powers,drive-vbus-en; /* set N_VBUSEN as output pin */ }; }; From 6375d40322f06925f5f24cd555b12567c830502b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Dec 2018 11:55:22 +0800 Subject: [PATCH 0012/1436] brcmsmac: remove set but not used variables 'phybw40, maxtargetpwr' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c:1202:5: warning: variable 'phybw40' set but not used [-Wunused-but-set-variable] drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c:4625:5: warning: variable 'phybw40' set but not used [-Wunused-but-set-variable] drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c:4834:5: warning: variable 'phybw40' set but not used [-Wunused-but-set-variable] drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c:3085:17: warning: variable 'maxtargetpwr' set but not used [-Wunused-but-set-variable] drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c:4215:17: warning: variable 'maxtargetpwr' set but not used [-Wunused-but-set-variable] Signed-off-by: YueHaibing Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/phy/phy_lcn.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c index e78a93a45741..c6e107f41948 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_lcn.c @@ -1199,8 +1199,6 @@ wlc_lcnphy_rx_iq_est(struct brcms_phy *pi, { int wait_count = 0; bool result = true; - u8 phybw40; - phybw40 = CHSPEC_IS40(pi->radio_chanspec); mod_phy_reg(pi, 0x6da, (0x1 << 5), (1) << 5); @@ -3082,7 +3080,7 @@ static void wlc_lcnphy_tx_pwr_ctrl_init(struct brcms_phy_pub *ppi) u8 bbmult; struct phytbl_info tab; s32 a1, b0, b1; - s32 tssi, pwr, maxtargetpwr, mintargetpwr; + s32 tssi, pwr, mintargetpwr; bool suspend; struct brcms_phy *pi = container_of(ppi, struct brcms_phy, pubpi_ro); @@ -3119,7 +3117,6 @@ static void wlc_lcnphy_tx_pwr_ctrl_init(struct brcms_phy_pub *ppi) b0 = pi->txpa_2g[0]; b1 = pi->txpa_2g[1]; a1 = pi->txpa_2g[2]; - maxtargetpwr = wlc_lcnphy_tssi2dbm(10, a1, b0, b1); mintargetpwr = wlc_lcnphy_tssi2dbm(125, a1, b0, b1); tab.tbl_id = LCNPHY_TBL_ID_TXPWRCTL; @@ -4212,7 +4209,7 @@ static void wlc_lcnphy_periodic_cal(struct brcms_phy *pi) s8 index; struct phytbl_info tab; s32 a1, b0, b1; - s32 tssi, pwr, maxtargetpwr, mintargetpwr; + s32 tssi, pwr, mintargetpwr; struct brcms_phy_lcnphy *pi_lcn = pi->u.pi_lcnphy; pi->phy_lastcal = pi->sh->now; @@ -4249,7 +4246,6 @@ static void wlc_lcnphy_periodic_cal(struct brcms_phy *pi) b0 = pi->txpa_2g[0]; b1 = pi->txpa_2g[1]; a1 = pi->txpa_2g[2]; - maxtargetpwr = wlc_lcnphy_tssi2dbm(10, a1, b0, b1); mintargetpwr = wlc_lcnphy_tssi2dbm(125, a1, b0, b1); tab.tbl_id = LCNPHY_TBL_ID_TXPWRCTL; @@ -4622,13 +4618,10 @@ static void wlc_lcnphy_radio_init(struct brcms_phy *pi) static void wlc_lcnphy_tbl_init(struct brcms_phy *pi) { uint idx; - u8 phybw40; struct phytbl_info tab; const struct phytbl_info *tb; u32 val; - phybw40 = CHSPEC_IS40(pi->radio_chanspec); - for (idx = 0; idx < dot11lcnphytbl_info_sz_rev0; idx++) wlc_lcnphy_write_table(pi, &dot11lcnphytbl_info_rev0[idx]); @@ -4831,9 +4824,7 @@ static void wlc_lcnphy_baseband_init(struct brcms_phy *pi) void wlc_phy_init_lcnphy(struct brcms_phy *pi) { - u8 phybw40; struct brcms_phy_lcnphy *pi_lcn = pi->u.pi_lcnphy; - phybw40 = CHSPEC_IS40(pi->radio_chanspec); pi_lcn->lcnphy_cal_counter = 0; pi_lcn->lcnphy_cal_temper = pi_lcn->lcnphy_rawtempsense; From ba2ffc96321c8433606ceeb85c9e722b8113e5a7 Mon Sep 17 00:00:00 2001 From: Zumeng Chen Date: Wed, 19 Dec 2018 15:50:29 +0800 Subject: [PATCH 0013/1436] wlcore: Fix memory leak in case wl12xx_fetch_firmware failure Release fw_status, raw_fw_status, and tx_res_if when wl12xx_fetch_firmware failed instead of meaningless goto out to avoid the following memory leak reports(Only the last one listed): unreferenced object 0xc28a9a00 (size 512): comm "kworker/0:4", pid 31298, jiffies 2783204 (age 203.290s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<6624adab>] kmemleak_alloc+0x40/0x74 [<500ddb31>] kmem_cache_alloc_trace+0x1ac/0x270 [] wl12xx_chip_wakeup+0xc4/0x1fc [wlcore] [<76c5db53>] wl1271_op_add_interface+0x4a4/0x8f4 [wlcore] [] drv_add_interface+0xa4/0x1a0 [mac80211] [<65bac325>] ieee80211_reconfig+0x9c0/0x1644 [mac80211] [<2817c80e>] ieee80211_restart_work+0x90/0xc8 [mac80211] [<7e1d425a>] process_one_work+0x284/0x42c [<55f9432e>] worker_thread+0x2fc/0x48c [] kthread+0x148/0x160 [<63144b13>] ret_from_fork+0x14/0x2c [< (null)>] (null) [<1f6e7715>] 0xffffffff Signed-off-by: Zumeng Chen Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 26b187336875..2e12de813a5b 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -1085,8 +1085,11 @@ static int wl12xx_chip_wakeup(struct wl1271 *wl, bool plt) goto out; ret = wl12xx_fetch_firmware(wl, plt); - if (ret < 0) - goto out; + if (ret < 0) { + kfree(wl->fw_status); + kfree(wl->raw_fw_status); + kfree(wl->tx_res_if); + } out: return ret; From 72255c807156adeb167444c4206c9e5eece22287 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Dec 2018 14:05:50 +0000 Subject: [PATCH 0014/1436] ray_cs: fix array out-of-bounds access Currently array element org[3] is being accessed, however the array is only 3 elements in size, so this looks like an off-by-one out-of-bounds error. Fix this by using org[2], which I believe was the original intent. This issue has existed in the driver back in the pre-git days, so no idea when it was introduced. Detected by CoverityScan, CID#711344 ("Out-of-bounds read") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ray_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 33ad87528d9a..8b2741c8edf2 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -959,7 +959,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, if (proto == htons(ETH_P_AARP) || proto == htons(ETH_P_IPX)) { /* This is the selective translation table, only 2 entries */ writeb(0xf8, - &((struct snaphdr_t __iomem *)ptx->var)->org[3]); + &((struct snaphdr_t __iomem *)ptx->var)->org[2]); } /* Copy body of ethernet packet without ethernet header */ memcpy_toio((void __iomem *)&ptx->var + From 4d95f99c59b8b814bcf09ba86020d937ec7caa86 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 20 Dec 2018 17:40:58 +0100 Subject: [PATCH 0015/1436] brcmfmac: Add DMI nvram filename quirk for PoV TAB-P1006W-232 tablet The Point of View TAB-P1006W-232 tablet contains quite generic names in the sys_vendor and product_name DMI strings, without this patch brcmfmac will try to load: brcmfmac43340-sdio.Insyde-BayTrail.txt as nvram file which is a bit too generic. Add a DMI quirk so that a unique and clearly identifiable nvram file name is used on the PoV TAB-P1006W-232 tablet. Signed-off-by: Hans de Goede Signed-off-by: Kalle Valo --- .../wireless/broadcom/brcm80211/brcmfmac/dmi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 51d76ac45075..7535cb0d4ac0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -43,6 +43,10 @@ static const struct brcmf_dmi_data meegopad_t08_data = { BRCM_CC_43340_CHIP_ID, 2, "meegopad-t08" }; +static const struct brcmf_dmi_data pov_tab_p1006w_data = { + BRCM_CC_43340_CHIP_ID, 2, "pov-tab-p1006w-data" +}; + static const struct dmi_system_id dmi_platform_data[] = { { /* Match for the GPDwin which unfortunately uses somewhat @@ -81,6 +85,17 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&meegopad_t08_data, }, + { + /* Point of View TAB-P1006W-232 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "BayTrail"), + /* Note 105b is Foxcon's USB/PCI vendor id */ + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "105B"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), + }, + .driver_data = (void *)&pov_tab_p1006w_data, + }, {} }; From 51c8d24101c79ffce3e79137e2cee5dfeb956dd7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Dec 2018 10:34:54 +0000 Subject: [PATCH 0016/1436] cw1200: fix missing unlock on error in cw1200_hw_scan() Add the missing unlock before return from function cw1200_hw_scan() in the error handling case. Fixes: 4f68ef64cd7f ("cw1200: Fix concurrency use-after-free bugs in cw1200_hw_scan()") Signed-off-by: Wei Yongjun Acked-by: Jia-Ju Bai Signed-off-by: Kalle Valo --- drivers/net/wireless/st/cw1200/scan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c index 0a9eac93dd01..71e9b91cf15b 100644 --- a/drivers/net/wireless/st/cw1200/scan.c +++ b/drivers/net/wireless/st/cw1200/scan.c @@ -84,8 +84,11 @@ int cw1200_hw_scan(struct ieee80211_hw *hw, frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0, req->ie_len); - if (!frame.skb) + if (!frame.skb) { + mutex_unlock(&priv->conf_mutex); + up(&priv->scan.lock); return -ENOMEM; + } if (req->ie_len) skb_put_data(frame.skb, req->ie, req->ie_len); From 06605b0d38b491c6dc52f4aee3e588e1309fbfaf Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 23 Dec 2018 09:57:15 +0100 Subject: [PATCH 0017/1436] cw1200: drop useless LIST_HEAD Drop LIST_HEAD where the variable it declares has never been used. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; @@ - LIST_HEAD(x); ... when != x // Fixes: a910e4a94f692 ("cw1200: add driver for the ST-E CW1100 & CW1200 WLAN chipsets") Signed-off-by: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/st/cw1200/queue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/st/cw1200/queue.c b/drivers/net/wireless/st/cw1200/queue.c index 7c31b63b8258..7895efefa95d 100644 --- a/drivers/net/wireless/st/cw1200/queue.c +++ b/drivers/net/wireless/st/cw1200/queue.c @@ -283,7 +283,6 @@ int cw1200_queue_put(struct cw1200_queue *queue, struct cw1200_txpriv *txpriv) { int ret = 0; - LIST_HEAD(gc_list); struct cw1200_queue_stats *stats = queue->stats; if (txpriv->link_id >= queue->stats->map_capacity) From 42daad3343be4a4e1ee03e30a5f5cc731dadfef5 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 19:22:24 -0600 Subject: [PATCH 0018/1436] brcmfmac: add a check for the status of usb_register usb_register() may fail, so let's check its status and issue an error message if it fails. Signed-off-by: Kangjie Lu Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index a4308c6e72d7..76cfaf6999c8 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1550,6 +1550,10 @@ void brcmf_usb_exit(void) void brcmf_usb_register(void) { + int ret; + brcmf_dbg(USB, "Enter\n"); - usb_register(&brcmf_usbdrvr); + ret = usb_register(&brcmf_usbdrvr); + if (ret) + brcmf_err("usb_register failed %d\n", ret); } From 7fdcb8e1266093eb2ff1bc3ab430daf4775388ad Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Thu, 27 Dec 2018 14:56:18 +0530 Subject: [PATCH 0019/1436] rsi: add support for hardware scan offload With the current approach of scanning, roaming delays are observed. Firmware has support for back ground scanning. To get this advantage, mac80211 hardware scan is implemented, which decides type of scan to do based on connected state. When station is in not connected, driver returns with special value 1 to trigger software scan in mac80211. In case of connected state, background scan will be triggered. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Siva Rebbagondla Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_hal.c | 3 + drivers/net/wireless/rsi/rsi_91x_mac80211.c | 67 ++++++++++ drivers/net/wireless/rsi/rsi_91x_main.c | 1 + drivers/net/wireless/rsi/rsi_91x_mgmt.c | 133 ++++++++++++++++++++ drivers/net/wireless/rsi/rsi_main.h | 22 ++++ drivers/net/wireless/rsi/rsi_mgmt.h | 35 ++++++ 6 files changed, 261 insertions(+) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 182b06629371..1dbaab2a96b7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -100,6 +100,9 @@ int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) mgmt_desc->frame_type = TX_DOT11_MGMT; mgmt_desc->header_len = MIN_802_11_HDR_LEN; mgmt_desc->xtend_desc_size = header_size - FRAME_DESC_SZ; + + if (ieee80211_is_probe_req(wh->frame_control)) + mgmt_desc->frame_info = cpu_to_le16(RSI_INSERT_SEQ_IN_FW); mgmt_desc->frame_info |= cpu_to_le16(RATE_INFO_ENABLE); if (is_broadcast_ether_addr(wh->addr1)) mgmt_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index e56fc83faf0e..aded1ae4fad5 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -229,6 +229,68 @@ static void rsi_register_rates_channels(struct rsi_hw *adapter, int band) /* sbands->ht_cap.mcs.rx_highest = 0x82; */ } +static int rsi_mac80211_hw_scan_start(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_scan_request *hw_req) +{ + struct cfg80211_scan_request *scan_req = &hw_req->req; + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + struct ieee80211_bss_conf *bss = &vif->bss_conf; + + rsi_dbg(INFO_ZONE, "***** Hardware scan start *****\n"); + + if (common->fsm_state != FSM_MAC_INIT_DONE) + return -ENODEV; + + if ((common->wow_flags & RSI_WOW_ENABLED) || + scan_req->n_channels == 0) + return -EINVAL; + + /* Scan already in progress. So return */ + if (common->bgscan_en) + return -EBUSY; + + /* If STA is not connected, return with special value 1, in order + * to start sw_scan in mac80211 + */ + if (!bss->assoc) + return 1; + + mutex_lock(&common->mutex); + common->hwscan = scan_req; + if (!rsi_send_bgscan_params(common, RSI_START_BGSCAN)) { + if (!rsi_send_bgscan_probe_req(common, vif)) { + rsi_dbg(INFO_ZONE, "Background scan started...\n"); + common->bgscan_en = true; + } + } + mutex_unlock(&common->mutex); + + return 0; +} + +static void rsi_mac80211_cancel_hw_scan(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + struct cfg80211_scan_info info; + + rsi_dbg(INFO_ZONE, "***** Hardware scan stop *****\n"); + mutex_lock(&common->mutex); + + if (common->bgscan_en) { + if (!rsi_send_bgscan_params(common, RSI_STOP_BGSCAN)) + common->bgscan_en = false; + info.aborted = false; + ieee80211_scan_completed(adapter->hw, &info); + rsi_dbg(INFO_ZONE, "Back ground scan cancelled\b\n"); + } + common->hwscan = NULL; + mutex_unlock(&common->mutex); +} + /** * rsi_mac80211_detach() - This function is used to de-initialize the * Mac80211 stack. @@ -1917,6 +1979,8 @@ static const struct ieee80211_ops mac80211_ops = { .suspend = rsi_mac80211_suspend, .resume = rsi_mac80211_resume, #endif + .hw_scan = rsi_mac80211_hw_scan_start, + .cancel_hw_scan = rsi_mac80211_cancel_hw_scan, }; /** @@ -1999,6 +2063,9 @@ int rsi_mac80211_attach(struct rsi_common *common) common->max_stations = wiphy->max_ap_assoc_sta; rsi_dbg(ERR_ZONE, "Max Stations Allowed = %d\n", common->max_stations); hw->sta_data_size = sizeof(struct rsi_sta); + + wiphy->max_scan_ssids = RSI_MAX_SCAN_SSIDS; + wiphy->max_scan_ie_len = RSI_MAX_SCAN_IE_LEN; wiphy->flags = WIPHY_FLAG_REPORTS_OBSS; wiphy->flags |= WIPHY_FLAG_AP_UAPSD; wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER; diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 01d99ed985ee..ca3a55ed72e4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -328,6 +328,7 @@ struct rsi_hw *rsi_91x_init(u16 oper_mode) } rsi_default_ps_params(adapter); + init_bgscan_params(common); spin_lock_init(&adapter->ps_lock); timer_setup(&common->roc_timer, rsi_roc_timeout, 0); init_completion(&common->wlan_init_completion); diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 1095df7d9573..404241424a62 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -15,6 +15,7 @@ */ #include +#include #include "rsi_mgmt.h" #include "rsi_common.h" #include "rsi_ps.h" @@ -236,6 +237,18 @@ static void rsi_set_default_parameters(struct rsi_common *common) common->dtim_cnt = RSI_DTIM_COUNT; } +void init_bgscan_params(struct rsi_common *common) +{ + memset((u8 *)&common->bgscan, 0, sizeof(struct rsi_bgscan_params)); + common->bgscan.bgscan_threshold = RSI_DEF_BGSCAN_THRLD; + common->bgscan.roam_threshold = RSI_DEF_ROAM_THRLD; + common->bgscan.bgscan_periodicity = RSI_BGSCAN_PERIODICITY; + common->bgscan.num_bgscan_channels = 0; + common->bgscan.two_probe = 1; + common->bgscan.active_scan_duration = RSI_ACTIVE_SCAN_TIME; + common->bgscan.passive_scan_duration = RSI_PASSIVE_SCAN_TIME; +} + /** * rsi_set_contention_vals() - This function sets the contention values for the * backoff procedure. @@ -1628,6 +1641,107 @@ int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, } #endif +int rsi_send_bgscan_params(struct rsi_common *common, int enable) +{ + struct rsi_bgscan_params *params = &common->bgscan; + struct cfg80211_scan_request *scan_req = common->hwscan; + struct rsi_bgscan_config *bgscan; + struct sk_buff *skb; + u16 frame_len = sizeof(*bgscan); + u8 i; + + rsi_dbg(MGMT_TX_ZONE, "%s: Sending bgscan params frame\n", __func__); + + skb = dev_alloc_skb(frame_len); + if (!skb) + return -ENOMEM; + memset(skb->data, 0, frame_len); + + bgscan = (struct rsi_bgscan_config *)skb->data; + rsi_set_len_qno(&bgscan->desc_dword0.len_qno, + (frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q); + bgscan->desc_dword0.frame_type = BG_SCAN_PARAMS; + bgscan->bgscan_threshold = cpu_to_le16(params->bgscan_threshold); + bgscan->roam_threshold = cpu_to_le16(params->roam_threshold); + if (enable) + bgscan->bgscan_periodicity = + cpu_to_le16(params->bgscan_periodicity); + bgscan->active_scan_duration = + cpu_to_le16(params->active_scan_duration); + bgscan->passive_scan_duration = + cpu_to_le16(params->passive_scan_duration); + bgscan->two_probe = params->two_probe; + + bgscan->num_bgscan_channels = scan_req->n_channels; + for (i = 0; i < bgscan->num_bgscan_channels; i++) + bgscan->channels2scan[i] = + cpu_to_le16(scan_req->channels[i]->hw_value); + + skb_put(skb, frame_len); + + return rsi_send_internal_mgmt_frame(common, skb); +} + +/* This function sends the probe request to be used by firmware in + * background scan + */ +int rsi_send_bgscan_probe_req(struct rsi_common *common, + struct ieee80211_vif *vif) +{ + struct cfg80211_scan_request *scan_req = common->hwscan; + struct rsi_bgscan_probe *bgscan; + struct sk_buff *skb; + struct sk_buff *probereq_skb; + u16 frame_len = sizeof(*bgscan); + size_t ssid_len = 0; + u8 *ssid = NULL; + + rsi_dbg(MGMT_TX_ZONE, + "%s: Sending bgscan probe req frame\n", __func__); + + if (common->priv->sc_nvifs <= 0) + return -ENODEV; + + if (scan_req->n_ssids) { + ssid = scan_req->ssids[0].ssid; + ssid_len = scan_req->ssids[0].ssid_len; + } + + skb = dev_alloc_skb(frame_len + MAX_BGSCAN_PROBE_REQ_LEN); + if (!skb) + return -ENOMEM; + memset(skb->data, 0, frame_len + MAX_BGSCAN_PROBE_REQ_LEN); + + bgscan = (struct rsi_bgscan_probe *)skb->data; + bgscan->desc_dword0.frame_type = BG_SCAN_PROBE_REQ; + bgscan->flags = cpu_to_le16(HOST_BG_SCAN_TRIG); + if (common->band == NL80211_BAND_5GHZ) { + bgscan->mgmt_rate = cpu_to_le16(RSI_RATE_6); + bgscan->def_chan = cpu_to_le16(40); + } else { + bgscan->mgmt_rate = cpu_to_le16(RSI_RATE_1); + bgscan->def_chan = cpu_to_le16(11); + } + bgscan->channel_scan_time = cpu_to_le16(RSI_CHANNEL_SCAN_TIME); + + probereq_skb = ieee80211_probereq_get(common->priv->hw, vif->addr, ssid, + ssid_len, scan_req->ie_len); + + memcpy(&skb->data[frame_len], probereq_skb->data, probereq_skb->len); + + bgscan->probe_req_length = cpu_to_le16(probereq_skb->len); + + rsi_set_len_qno(&bgscan->desc_dword0.len_qno, + (frame_len - FRAME_DESC_SZ + probereq_skb->len), + RSI_WIFI_MGMT_Q); + + skb_put(skb, frame_len + probereq_skb->len); + + dev_kfree_skb(probereq_skb); + + return rsi_send_internal_mgmt_frame(common, skb); +} + /** * rsi_handle_ta_confirm_type() - This function handles the confirm frames. * @common: Pointer to the driver private structure. @@ -1771,9 +1885,28 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common, return 0; } break; + + case SCAN_REQUEST: + rsi_dbg(INFO_ZONE, "Set channel confirm\n"); + break; + case WAKEUP_SLEEP_REQUEST: rsi_dbg(INFO_ZONE, "Wakeup/Sleep confirmation.\n"); return rsi_handle_ps_confirm(adapter, msg); + + case BG_SCAN_PROBE_REQ: + rsi_dbg(INFO_ZONE, "BG scan complete event\n"); + if (common->bgscan_en) { + struct cfg80211_scan_info info; + + if (!rsi_send_bgscan_params(common, RSI_STOP_BGSCAN)) + common->bgscan_en = 0; + info.aborted = false; + ieee80211_scan_completed(adapter->hw, &info); + } + rsi_dbg(INFO_ZONE, "Background scan completed\n"); + break; + default: rsi_dbg(INFO_ZONE, "%s: Invalid TA confirm pkt received\n", __func__); diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index a084f224bb03..4dc0c0123469 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -164,6 +164,24 @@ struct transmit_q_stats { u32 total_tx_pkt_freed[NUM_EDCA_QUEUES + 2]; }; +#define MAX_BGSCAN_CHANNELS_DUAL_BAND 38 +#define MAX_BGSCAN_PROBE_REQ_LEN 0x64 +#define RSI_DEF_BGSCAN_THRLD 0x0 +#define RSI_DEF_ROAM_THRLD 0xa +#define RSI_BGSCAN_PERIODICITY 0x1e +#define RSI_ACTIVE_SCAN_TIME 0x14 +#define RSI_PASSIVE_SCAN_TIME 0x46 +#define RSI_CHANNEL_SCAN_TIME 20 +struct rsi_bgscan_params { + u16 bgscan_threshold; + u16 roam_threshold; + u16 bgscan_periodicity; + u8 num_bgscan_channels; + u8 two_probe; + u16 active_scan_duration; + u16 passive_scan_duration; +}; + struct vif_priv { bool is_ht; bool sgi; @@ -289,6 +307,10 @@ struct rsi_common { bool eapol4_confirm; void *bt_adapter; + + struct cfg80211_scan_request *hwscan; + struct rsi_bgscan_params bgscan; + u8 bgscan_en; }; struct eepromrw_info { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 359fbdf85739..ea83faa15c7e 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -228,6 +228,9 @@ #define RSI_MAX_TX_AGGR_FRMS 8 #define RSI_MAX_RX_AGGR_FRMS 8 +#define RSI_MAX_SCAN_SSIDS 16 +#define RSI_MAX_SCAN_IE_LEN 256 + enum opmode { RSI_OPMODE_UNSUPPORTED = -1, RSI_OPMODE_AP = 0, @@ -623,6 +626,34 @@ struct rsi_wowlan_req { u16 host_sleep_status; } __packed; +#define RSI_START_BGSCAN 1 +#define RSI_STOP_BGSCAN 0 +#define HOST_BG_SCAN_TRIG BIT(4) +struct rsi_bgscan_config { + struct rsi_cmd_desc_dword0 desc_dword0; + __le64 reserved; + __le32 reserved1; + __le16 bgscan_threshold; + __le16 roam_threshold; + __le16 bgscan_periodicity; + u8 num_bgscan_channels; + u8 two_probe; + __le16 active_scan_duration; + __le16 passive_scan_duration; + __le16 channels2scan[MAX_BGSCAN_CHANNELS_DUAL_BAND]; +} __packed; + +struct rsi_bgscan_probe { + struct rsi_cmd_desc_dword0 desc_dword0; + __le64 reserved; + __le32 reserved1; + __le16 mgmt_rate; + __le16 flags; + __le16 def_chan; + __le16 channel_scan_time; + __le16 probe_req_length; +} __packed; + static inline u32 rsi_get_queueno(u8 *addr, u16 offset) { return (le16_to_cpu(*(__le16 *)&addr[offset]) & 0x7000) >> 12; @@ -694,4 +725,8 @@ int rsi_send_wowlan_request(struct rsi_common *common, u16 flags, #endif int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, struct ieee80211_vif *vif); +void init_bgscan_params(struct rsi_common *common); +int rsi_send_bgscan_params(struct rsi_common *common, int enable); +int rsi_send_bgscan_probe_req(struct rsi_common *common, + struct ieee80211_vif *vif); #endif From 5a1c18b761ddb299a06746948b9ec2814b04fa92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 2 Jan 2019 00:00:01 +0100 Subject: [PATCH 0020/1436] bcma: keep a direct pointer to the struct device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing struct device is pretty useful/common so having a direct pointer: 1) Simplifies some code 2) Makes bcma_bus_get_host_dev() unneeded 3) Allows further improvements like using dev_* printing helpers Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 1 - drivers/bcma/driver_gpio.c | 2 +- drivers/bcma/host_pci.c | 2 ++ drivers/bcma/host_soc.c | 4 ++-- drivers/bcma/main.c | 45 +++++++++---------------------------- include/linux/bcma/bcma.h | 11 +++------ 6 files changed, 18 insertions(+), 47 deletions(-) diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index a4aac370f21f..1f0e66310b23 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -33,7 +33,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus); int bcma_bus_suspend(struct bcma_bus *bus); int bcma_bus_resume(struct bcma_bus *bus); #endif -struct device *bcma_bus_get_host_dev(struct bcma_bus *bus); /* scan.c */ void bcma_detect_chip(struct bcma_bus *bus); diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 2c0ffb77d738..a5df3d111334 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -183,7 +183,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; chip->owner = THIS_MODULE; - chip->parent = bcma_bus_get_host_dev(bus); + chip->parent = bus->dev; #if IS_BUILTIN(CONFIG_OF) chip->of_node = cc->core->dev.of_node; #endif diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 63410ecfe640..f52239feb4cb 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -196,6 +196,8 @@ static int bcma_host_pci_probe(struct pci_dev *dev, goto err_pci_release_regions; } + bus->dev = &dev->dev; + /* Map MMIO */ err = -ENOMEM; bus->mmio = pci_iomap(dev, 0, ~0UL); diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 2dce34789329..c8073b509a2b 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -179,7 +179,6 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) /* Host specific */ bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; - bus->host_pdev = NULL; /* Initialize struct, detect chip */ bcma_init_bus(bus); @@ -213,6 +212,8 @@ static int bcma_host_soc_probe(struct platform_device *pdev) if (!bus) return -ENOMEM; + bus->dev = dev; + /* Map MMIO */ bus->mmio = of_iomap(np, 0); if (!bus->mmio) @@ -221,7 +222,6 @@ static int bcma_host_soc_probe(struct platform_device *pdev) /* Host specific */ bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; - bus->host_pdev = pdev; /* Initialize struct, detect chip */ bcma_init_bus(bus); diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index fc1f4acdd189..6535614a7dc1 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -223,8 +223,8 @@ unsigned int bcma_core_irq(struct bcma_device *core, int num) mips_irq = bcma_core_mips_irq(core); return mips_irq <= 4 ? mips_irq + 2 : 0; } - if (bus->host_pdev) - return bcma_of_get_irq(&bus->host_pdev->dev, core, num); + if (bus->dev) + return bcma_of_get_irq(bus->dev, core, num); return 0; case BCMA_HOSTTYPE_SDIO: return 0; @@ -239,18 +239,18 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) core->dev.release = bcma_release_core_dev; core->dev.bus = &bcma_bus_type; dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index); - core->dev.parent = bcma_bus_get_host_dev(bus); - if (core->dev.parent) - bcma_of_fill_device(core->dev.parent, core); + core->dev.parent = bus->dev; + if (bus->dev) + bcma_of_fill_device(bus->dev, core); switch (bus->hosttype) { case BCMA_HOSTTYPE_PCI: - core->dma_dev = &bus->host_pci->dev; + core->dma_dev = bus->dev; core->irq = bus->host_pci->irq; break; case BCMA_HOSTTYPE_SOC: - if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) { - core->dma_dev = &bus->host_pdev->dev; + if (IS_ENABLED(CONFIG_OF) && bus->dev) { + core->dma_dev = bus->dev; } else { core->dev.dma_mask = &core->dev.coherent_dma_mask; core->dma_dev = &core->dev; @@ -261,28 +261,6 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) } } -struct device *bcma_bus_get_host_dev(struct bcma_bus *bus) -{ - switch (bus->hosttype) { - case BCMA_HOSTTYPE_PCI: - if (bus->host_pci) - return &bus->host_pci->dev; - else - return NULL; - case BCMA_HOSTTYPE_SOC: - if (bus->host_pdev) - return &bus->host_pdev->dev; - else - return NULL; - case BCMA_HOSTTYPE_SDIO: - if (bus->host_sdio) - return &bus->host_sdio->dev; - else - return NULL; - } - return NULL; -} - void bcma_init_bus(struct bcma_bus *bus) { mutex_lock(&bcma_buses_mutex); @@ -402,7 +380,6 @@ int bcma_bus_register(struct bcma_bus *bus) { int err; struct bcma_device *core; - struct device *dev; /* Scan for devices (cores) */ err = bcma_bus_scan(bus); @@ -425,10 +402,8 @@ int bcma_bus_register(struct bcma_bus *bus) bcma_core_pci_early_init(&bus->drv_pci[0]); } - dev = bcma_bus_get_host_dev(bus); - if (dev) { - of_platform_default_populate(dev->of_node, NULL, dev); - } + if (bus->dev) + of_platform_default_populate(bus->dev->of_node, NULL, bus->dev); /* Cores providing flash access go before SPROM init */ list_for_each_entry(core, &bus->cores, list) { diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index ef61f3607e99..60b94b944e9f 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -332,6 +332,8 @@ extern int bcma_arch_register_fallback_sprom( struct ssb_sprom *out)); struct bcma_bus { + struct device *dev; + /* The MMIO area. */ void __iomem *mmio; @@ -339,14 +341,7 @@ struct bcma_bus { enum bcma_hosttype hosttype; bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */ - union { - /* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */ - struct pci_dev *host_pci; - /* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */ - struct sdio_func *host_sdio; - /* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */ - struct platform_device *host_pdev; - }; + struct pci_dev *host_pci; /* PCI bus pointer (BCMA_HOSTTYPE_PCI only) */ struct bcma_chipinfo chipinfo; From 777bc4801a6868fcbff09ffb6e30f023e7c5ed38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 2 Jan 2019 00:00:02 +0100 Subject: [PATCH 0021/1436] bcma: use dev_* printing functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It provides more meaningful messages. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- drivers/bcma/bcma_private.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 1f0e66310b23..6eded32d1aac 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -10,13 +10,13 @@ #include #define bcma_err(bus, fmt, ...) \ - pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) + dev_err((bus)->dev, "bus%d: " fmt, (bus)->num, ##__VA_ARGS__) #define bcma_warn(bus, fmt, ...) \ - pr_warn("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) + dev_warn((bus)->dev, "bus%d: " fmt, (bus)->num, ##__VA_ARGS__) #define bcma_info(bus, fmt, ...) \ - pr_info("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) + dev_info((bus)->dev, "bus%d: " fmt, (bus)->num, ##__VA_ARGS__) #define bcma_debug(bus, fmt, ...) \ - pr_debug("bus%d: " fmt, (bus)->num, ##__VA_ARGS__) + dev_dbg((bus)->dev, "bus%d: " fmt, (bus)->num, ##__VA_ARGS__) struct bcma_bus; From 3a33bd840523aaa06f4429fbfd38922bf0dc2e8d Mon Sep 17 00:00:00 2001 From: Lo-Hsiang Lo Date: Mon, 7 Jan 2019 08:46:16 +0000 Subject: [PATCH 0022/1436] brcmfmac: fix system warning message during wowl suspend There is a system warning message, warn_slowpath-fmt, during suspend while using supplicant join AP and enable wowl feature by IW command. It's caused by brcmf_pno_remove_request path can't find the reqid. This fix will not go to remove pno request function if there is no pno scan. Acked-by: Arend van Spriel Signed-off-by: Lo-Hsiang Lo Signed-off-by: Chi-Hsien Lin Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c index ffa243e2e2d0..37b403877228 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pno.c @@ -496,6 +496,11 @@ int brcmf_pno_stop_sched_scan(struct brcmf_if *ifp, u64 reqid) brcmf_dbg(TRACE, "reqid=%llu\n", reqid); pi = ifp_to_pno(ifp); + + /* No PNO request */ + if (!pi->n_reqs) + return 0; + err = brcmf_pno_remove_request(pi, reqid); if (err) return err; From 4ad0be160544ffbdafb7cec39bb8e6dd0a97317a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Jan 2019 14:33:27 +0100 Subject: [PATCH 0023/1436] brcmfmac: Use firmware_request_nowarn for the clm_blob The linux-firmware brcmfmac firmware files contain an embedded table with per country allowed channels and strength info. For recent hardware these versions of the firmware are specially build for linux-firmware, the firmware files directly available from Cypress rely on a separate clm_blob file for this info. For some unknown reason Cypress refuses to provide the standard firmware files + clm_blob files it uses elsewhere for inclusion into linux-firmware, instead relying on these special builds with the clm_blob info embedded. This means that the linux-firmware firmware versions often lag behind, but I digress. The brcmfmac driver does support the separate clm_blob file and always tries to load this. Currently we use request_firmware for this. This means that on any standard install, using the standard combo of linux-kernel + linux-firmware, we will get a warning: "Direct firmware load for ... failed with error -2" On top of this, brcmfmac itself prints: "no clm_blob available (err=-2), device may have limited channels available". This commit switches to firmware_request_nowarn, fixing almost any brcmfmac device logging the warning (it leaves the brcmfmac info message in place). Signed-off-by: Hans de Goede Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 1f1e95a15a17..0ce1d8174e6d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -149,7 +149,7 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) return err; } - err = request_firmware(&clm, clm_name, bus->dev); + err = firmware_request_nowarn(&clm, clm_name, bus->dev); if (err) { brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n", err); From e35e26b26e955c53e61c154ba26b9bb15da6b858 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Fri, 7 Dec 2018 10:52:31 +0000 Subject: [PATCH 0024/1436] arm: dts: meson: Fix IRQ trigger type for macirq A long running stress test on a custom board shipping an AXG SoCs and a Realtek RTL8211F PHY revealed that after a few hours the connection speed would drop drastically, from ~1000Mbps to ~3Mbps. At the same time the 'macirq' (eth0) IRQ would stop being triggered at all and as consequence the GMAC IRQs never ACKed. After a painful investigation the problem seemed to be due to a wrong defined IRQ type for the GMAC IRQ that should be LEVEL_HIGH instead of EDGE_RISING. The change in the macirq IRQ type also solved another long standing issue affecting this SoC/PHY where EEE was causing the network connection to die after stressing it with iperf3 (even though much sooner). It's now possible to remove the 'eee-broken-1000t' quirk as well. Fixes: 9c15795a4f96 ("ARM: dts: meson8b-odroidc1: ethernet support") Signed-off-by: Carlo Caione Reviewed-by: Martin Blumenstingl Tested-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson.dtsi | 2 +- arch/arm/boot/dts/meson8b-odroidc1.dts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index e4645f612712..2ab74860d962 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -274,7 +274,7 @@ compatible = "amlogic,meson6-dwmac", "snps,dwmac"; reg = <0xc9410000 0x10000 0xc1108108 0x4>; - interrupts = ; + interrupts = ; interrupt-names = "macirq"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 58669abda259..a951a6632d0c 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -221,7 +221,6 @@ /* Realtek RTL8211F (0x001cc916) */ eth_phy: ethernet-phy@0 { reg = <0>; - eee-broken-1000t; interrupt-parent = <&gpio_intc>; /* GPIOH_3 */ interrupts = <17 IRQ_TYPE_LEVEL_LOW>; From 3fb348e030319f20ebbde082a449d4bf8a96f2fd Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Dec 2018 13:57:09 +0100 Subject: [PATCH 0025/1436] ARM: dts: meson8b: odroidc1: mark the SD card detection GPIO active-low After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore. The CD GPIO is "active low" on Odroid-C1. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted". Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted. Fixes: e03efbce6bebf5 ("ARM: dts: meson8b-odroidc1: add microSD support") Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Tested-by: Anand Moon Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8b-odroidc1.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index a951a6632d0c..0f0a46ddf3ff 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -272,8 +272,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>; From c8bfe65fb1fb7a43d766df1dfa379406112cba61 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Dec 2018 13:57:10 +0100 Subject: [PATCH 0026/1436] ARM: dts: meson8b: ec100: mark the SD card detection GPIO active-low After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore. The CD GPIO is "active low" on the EC-100. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted". Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted. Fixes: bbedc1f1d90e33 ("ARM: dts: meson8b: Add support for the Endless Mini (EC-100)") Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8b-ec100.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8b-ec100.dts b/arch/arm/boot/dts/meson8b-ec100.dts index 0872f6e3abf5..d50fc2f60fa3 100644 --- a/arch/arm/boot/dts/meson8b-ec100.dts +++ b/arch/arm/boot/dts/meson8b-ec100.dts @@ -205,8 +205,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; From 8615f5596335db0978cea593dcd0070dc5f8b116 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Dec 2018 13:57:11 +0100 Subject: [PATCH 0027/1436] ARM: dts: meson8m2: mxiii-plus: mark the SD card detection GPIO active-low After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore. The CD GPIO is "active low" on the MXIII-Plus. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted". Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted. Fixes: 35ee52bea66c74 ("ARM: dts: meson8m2: add support for the Tronsmart MXIII Plus") Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/meson8m2-mxiii-plus.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index f5853610b20b..6ac02beb5fa7 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -206,8 +206,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; From 9fe310546559d123103c39dc85eb54dc1316f450 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 6 Nov 2018 12:29:44 +0100 Subject: [PATCH 0028/1436] mt76: add size check for additional rx fragments So far the code only validates the buffer size of the first skb. Extend this check to cover additional fragments as well, in case the size is corrupted during a DMA reset. Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index e2ba26378575..710a77fccf63 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -430,6 +430,14 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) if (!data) break; + if (q->buf_size < len + q->buf_offset) { + dev_kfree_skb(q->rx_head); + q->rx_head = NULL; + + skb_free_frag(data); + continue; + } + if (q->rx_head) { mt76_add_fragment(dev, q, data, len, more); continue; From cbbfd73767cd7a43fbebe712cb837d7ff06b99f2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 29 Nov 2018 13:13:54 +0100 Subject: [PATCH 0029/1436] mt76: throttle transmission of buffered multicast packets Avoids drowning out regular transmissions Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 66315410aebe..6974acc75e2b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -121,9 +121,10 @@ static void mt76x02_pre_tbtt_tasklet(unsigned long arg) ieee80211_iterate_active_interfaces_atomic(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, mt76x02_add_buffered_bc, &data); - } while (nframes != skb_queue_len(&data.q)); + } while (nframes != skb_queue_len(&data.q) && + skb_queue_len(&data.q) < 8); - if (!nframes) + if (!skb_queue_len(&data.q)) return; for (i = 0; i < ARRAY_SIZE(data.tail); i++) { From 7267a7965dd7b0f2a0800538c8080f3e683846fc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 13 Nov 2018 20:54:17 +0100 Subject: [PATCH 0030/1436] mt76: request tx status for powersave released EOSP packet Allows mac80211 to keep track of the service period Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 7b711058807d..f08509c1670d 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -330,7 +330,8 @@ mt76_queue_ps_skb(struct mt76_dev *dev, struct ieee80211_sta *sta, info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; if (last) - info->flags |= IEEE80211_TX_STATUS_EOSP; + info->flags |= IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; mt76_skb_set_moredata(skb, !last); dev->queue_ops->tx_queue_skb(dev, hwq, skb, wcid, sta); From 4d4b12bc56d33d68ad04382f4c15c42bf4616270 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 1 Dec 2018 10:55:06 +0100 Subject: [PATCH 0031/1436] mt76: dma: remove napi from mt76_dma_rx_fill signature Remove napi from mt76_dma_rx_fill routine signature since it is a leftover of a previous implementation and it is no longer used Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 710a77fccf63..b7fd2e110663 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -318,7 +318,7 @@ free: EXPORT_SYMBOL_GPL(mt76_dma_tx_queue_skb); static int -mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q, bool napi) +mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q) { dma_addr_t addr; void *buf; @@ -392,7 +392,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid) mt76_dma_rx_cleanup(dev, q); mt76_dma_sync_idx(dev, q); - mt76_dma_rx_fill(dev, q, false); + mt76_dma_rx_fill(dev, q); } static void @@ -471,7 +471,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) dev->drv->rx_skb(dev, q - dev->q_rx, skb); } - mt76_dma_rx_fill(dev, q, true); + mt76_dma_rx_fill(dev, q); return done; } @@ -512,7 +512,7 @@ mt76_dma_init(struct mt76_dev *dev) for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) { netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll, 64); - mt76_dma_rx_fill(dev, &dev->q_rx[i], false); + mt76_dma_rx_fill(dev, &dev->q_rx[i]); skb_queue_head_init(&dev->rx_skb[i]); napi_enable(&dev->napi[i]); } From 0ecf94dc3695a494dd4f495fc0913049d0e99891 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 1 Dec 2018 15:01:19 +0100 Subject: [PATCH 0032/1436] mt76: usb: do not build the skb if reported len does not fit in buf_size Precompute data length in order to avoid to allocate the related skb data structure if reported length does not fit in queue buf_size Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/usb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index b061263453d4..14ff06c5764e 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -407,17 +407,15 @@ mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb) if (len < 0) return 0; + data_len = min_t(int, len, urb->sg[0].length - MT_DMA_HDR_LEN); + if (MT_DMA_HDR_LEN + data_len > SKB_WITH_OVERHEAD(q->buf_size)) + return 0; + skb = build_skb(data, q->buf_size); if (!skb) return 0; - data_len = min_t(int, len, urb->sg[0].length - MT_DMA_HDR_LEN); skb_reserve(skb, MT_DMA_HDR_LEN); - if (skb->tail + data_len > skb->end) { - dev_kfree_skb(skb); - return 1; - } - __skb_put(skb, data_len); len -= data_len; From 85b7e2acd31bf975c5163e0c6d949fa3742921b1 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 2 Dec 2018 13:06:34 +0100 Subject: [PATCH 0033/1436] mt76: Add missing include of linux/module.h MODULE_FIRMWARE() is used in usb_mcu.c and provided by linux/module.h, but this header file is not directly included. This causes problems in backports with some kernel versions. Add the missing include. Signed-off-by: Hauke Mehrtens Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x0/usb_mcu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb_mcu.c index 9d7585029df9..f391d2d21fbc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb_mcu.c @@ -15,6 +15,7 @@ */ #include #include +#include #include "mt76x0.h" #include "mcu.h" From 374eb1b55540c99af4e2417a2d51ff5f56cfdb70 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 6 Dec 2018 12:40:23 +0100 Subject: [PATCH 0034/1436] mt76: fix typo in mt76x02_check_mac_err routine Reconfigure properly MT_MAC_SYS_CTRL register after mac sw-reset in mt76x02_check_mac_err routine Fixes: 73556561ab9f ("mt76x0: use mt76x02_mac_work as stats handler") Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index c08bf371e527..490368dc9325 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -774,8 +774,8 @@ static void mt76x02_check_mac_err(struct mt76x02_dev *dev) mt76_set(dev, MT_MAC_SYS_CTRL, MT_MAC_SYS_CTRL_RESET_CSR); udelay(10); - mt76_clear(dev, MT_MAC_SYS_CTRL, - MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); + mt76_wr(dev, MT_MAC_SYS_CTRL, + MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); } void mt76x02_mac_work(struct work_struct *work) From 4989338ed3053372fcf5173a2a4800ac99de07d9 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 6 Dec 2018 17:07:39 +0100 Subject: [PATCH 0035/1436] mt76: mac: run mt76x02_mac_work routine atomically Grab mt76_dev mutex in mt76x02_mac_work handler since it runs concurrently with mt76x{0,2}_set_channel routines Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 490368dc9325..64841f40cdcf 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -784,6 +784,8 @@ void mt76x02_mac_work(struct work_struct *work) mac_work.work); int i, idx; + mutex_lock(&dev->mt76.mutex); + mt76x02_update_channel(&dev->mt76); for (i = 0, idx = 0; i < 16; i++) { u32 val = mt76_rr(dev, MT_TX_AGG_CNT(i)); @@ -796,6 +798,8 @@ void mt76x02_mac_work(struct work_struct *work) if (!dev->beacon_mask) mt76x02_check_mac_err(dev); + mutex_unlock(&dev->mt76.mutex); + mt76_tx_status_check(&dev->mt76, NULL, false); ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mac_work, From e207afa0232d40606e025059c18c70b89a7d5912 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 6 Dec 2018 19:11:55 +0100 Subject: [PATCH 0036/1436] mt76: usb: avoid queue/status spinlocks while passing tx status to mac80211 As already done for pcie code in commit 79d1c94c9c78 ("mt76: avoid queue/status spinlocks while passing tx status to mac80211") make sure that no tx related spinlocks are taken during the ieee80211_tx_status call Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/usb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 14ff06c5764e..6a2507524c6c 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -583,6 +583,7 @@ static void mt76u_stop_rx(struct mt76_dev *dev) static void mt76u_tx_tasklet(unsigned long data) { struct mt76_dev *dev = (struct mt76_dev *)data; + struct mt76_queue_entry entry; struct mt76u_buf *buf; struct mt76_queue *q; bool wake; @@ -597,17 +598,18 @@ static void mt76u_tx_tasklet(unsigned long data) if (!buf->done || !q->queued) break; - dev->drv->tx_complete_skb(dev, q, - &q->entry[q->head], - false); - if (q->entry[q->head].schedule) { q->entry[q->head].schedule = false; q->swq_queued--; } + entry = q->entry[q->head]; q->head = (q->head + 1) % q->ndesc; q->queued--; + + spin_unlock_bh(&q->lock); + dev->drv->tx_complete_skb(dev, q, &entry, false); + spin_lock_bh(&q->lock); } mt76_txq_schedule(dev, q); wake = i < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; From b9f81643b6aea47602adc3a61edf5ad207d640ec Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 10 Dec 2018 11:22:00 +0100 Subject: [PATCH 0037/1436] mt76x0: pci: fix ACS support Fix Automatic Channel Selection (ACS) support in mt76x0e driver configuring properly MT_CH_TIME_CFG register Fixes: 6250318694ca ("mt76x0: pci: add get_survey support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index d895b6f3dc44..1906cc62690f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -141,6 +141,14 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev) mt76_clear(dev, 0x110, BIT(9)); mt76_set(dev, MT_MAX_LEN_CFG, BIT(13)); + mt76_wr(dev, MT_CH_TIME_CFG, + MT_CH_TIME_CFG_TIMER_EN | + MT_CH_TIME_CFG_TX_AS_BUSY | + MT_CH_TIME_CFG_RX_AS_BUSY | + MT_CH_TIME_CFG_NAV_AS_BUSY | + MT_CH_TIME_CFG_EIFS_AS_BUSY | + FIELD_PREP(MT_CH_TIME_CFG_CH_TIMER_CLR, 1)); + err = mt76x0_register_device(dev); if (err < 0) return err; From 20ce270e5a7da08fdf946ed674ba10c71d18fdfb Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 10 Dec 2018 11:53:55 +0100 Subject: [PATCH 0038/1436] mt76x02: do not set protection on set_rts_threshold callback Use set_rts_threshold calback to enable/disable threshold only for legacy traffic. Protection for HT and VHT traffic is defined by HT operation element and it's provided by remote AP or by hostapd. Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 16 +--------------- drivers/net/wireless/mediatek/mt76/mt76x02_mac.h | 2 +- .../net/wireless/mediatek/mt76/mt76x02_util.c | 2 +- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 64841f40cdcf..d3f1d57fb758 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -715,7 +715,7 @@ void mt76x02_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue *q, } EXPORT_SYMBOL_GPL(mt76x02_tx_complete_skb); -void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, u32 val) +void mt76x02_mac_set_rts_thresh(struct mt76x02_dev *dev, u32 val) { u32 data = 0; @@ -729,20 +729,6 @@ void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, u32 val) MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); mt76_rmw(dev, MT_OFDM_PROT_CFG, MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_MM20_PROT_CFG, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_MM40_PROT_CFG, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_GF20_PROT_CFG, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_GF40_PROT_CFG, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_TX_PROT_CFG6, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_TX_PROT_CFG7, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); - mt76_rmw(dev, MT_TX_PROT_CFG8, - MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); } void mt76x02_update_channel(struct mt76_dev *mdev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index 4e597004c445..d1a8ed171537 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -194,7 +194,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, struct mt76x02_tx_status *stat, u8 *update); int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb, void *rxi); -void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, u32 val); +void mt76x02_mac_set_rts_thresh(struct mt76x02_dev *dev, u32 val); void mt76x02_mac_setaddr(struct mt76x02_dev *dev, u8 *addr); void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 38bd466cff16..364e7b2ebf99 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -463,7 +463,7 @@ int mt76x02_set_rts_threshold(struct ieee80211_hw *hw, u32 val) return -EINVAL; mutex_lock(&dev->mt76.mutex); - mt76x02_mac_set_tx_protection(dev, val); + mt76x02_mac_set_rts_thresh(dev, val); mutex_unlock(&dev->mt76.mutex); return 0; From 493d2dfab34bb88563a6a64801c7d604537ca958 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 10 Dec 2018 11:53:56 +0100 Subject: [PATCH 0039/1436] mt76x02: fixup MT_PROT_RATE_* defines On new mt76 chips, phy mode is configured by last 3 bits of rate value. Hence OFDM bit is marked by 0x2000 instead of 0x4000. Acked-by: Lorenzo Bianconi Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_regs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h index f7de77d09d28..d1ac847adce5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h @@ -440,9 +440,9 @@ #define MT_PROT_TXOP_ALLOW_GF40 BIT(25) #define MT_PROT_RTS_THR_EN BIT(26) #define MT_PROT_RATE_CCK_11 0x0003 -#define MT_PROT_RATE_OFDM_6 0x4000 -#define MT_PROT_RATE_OFDM_24 0x4004 -#define MT_PROT_RATE_DUP_OFDM_24 0x4084 +#define MT_PROT_RATE_OFDM_6 0x2000 +#define MT_PROT_RATE_OFDM_24 0x2004 +#define MT_PROT_RATE_DUP_OFDM_24 0x2084 #define MT_PROT_TXOP_ALLOW_ALL GENMASK(25, 20) #define MT_PROT_TXOP_ALLOW_BW20 (MT_PROT_TXOP_ALLOW_ALL & \ ~MT_PROT_TXOP_ALLOW_MM40 & \ From 26a7b5473191d362b09a190fafd0dc2059cbf527 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 10 Dec 2018 11:53:57 +0100 Subject: [PATCH 0040/1436] mt76x02: set protection according to ht operation element Configure protection based on information that are provided to us either by remote AP or by hostapd via HT operation IE. Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt76x02_mac.c | 83 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt76x02_mac.h | 2 + .../net/wireless/mediatek/mt76/mt76x02_regs.h | 1 + .../net/wireless/mediatek/mt76/mt76x02_util.c | 4 + 4 files changed, 90 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index d3f1d57fb758..ddc1c3146feb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -731,6 +731,89 @@ void mt76x02_mac_set_rts_thresh(struct mt76x02_dev *dev, u32 val) MT_PROT_CFG_CTRL | MT_PROT_CFG_RTS_THRESH, data); } +void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, bool legacy_prot, + int ht_mode) +{ + int mode = ht_mode & IEEE80211_HT_OP_MODE_PROTECTION; + bool non_gf = !!(ht_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); + u32 prot[6]; + u32 vht_prot[3]; + int i; + u16 rts_thr; + + for (i = 0; i < ARRAY_SIZE(prot); i++) { + prot[i] = mt76_rr(dev, MT_CCK_PROT_CFG + i * 4); + prot[i] &= ~MT_PROT_CFG_CTRL; + if (i >= 2) + prot[i] &= ~MT_PROT_CFG_RATE; + } + + for (i = 0; i < ARRAY_SIZE(vht_prot); i++) { + vht_prot[i] = mt76_rr(dev, MT_TX_PROT_CFG6 + i * 4); + vht_prot[i] &= ~(MT_PROT_CFG_CTRL | MT_PROT_CFG_RATE); + } + + rts_thr = mt76_get_field(dev, MT_TX_RTS_CFG, MT_TX_RTS_CFG_THRESH); + + if (rts_thr != 0xffff) + prot[0] |= MT_PROT_CTRL_RTS_CTS; + + if (legacy_prot) { + prot[1] |= MT_PROT_CTRL_CTS2SELF; + + prot[2] |= MT_PROT_RATE_CCK_11; + prot[3] |= MT_PROT_RATE_CCK_11; + prot[4] |= MT_PROT_RATE_CCK_11; + prot[5] |= MT_PROT_RATE_CCK_11; + + vht_prot[0] |= MT_PROT_RATE_CCK_11; + vht_prot[1] |= MT_PROT_RATE_CCK_11; + vht_prot[2] |= MT_PROT_RATE_CCK_11; + } else { + if (rts_thr != 0xffff) + prot[1] |= MT_PROT_CTRL_RTS_CTS; + + prot[2] |= MT_PROT_RATE_OFDM_24; + prot[3] |= MT_PROT_RATE_DUP_OFDM_24; + prot[4] |= MT_PROT_RATE_OFDM_24; + prot[5] |= MT_PROT_RATE_DUP_OFDM_24; + + vht_prot[0] |= MT_PROT_RATE_OFDM_24; + vht_prot[1] |= MT_PROT_RATE_DUP_OFDM_24; + vht_prot[2] |= MT_PROT_RATE_SGI_OFDM_24; + } + + switch (mode) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + prot[2] |= MT_PROT_CTRL_RTS_CTS; + prot[3] |= MT_PROT_CTRL_RTS_CTS; + prot[4] |= MT_PROT_CTRL_RTS_CTS; + prot[5] |= MT_PROT_CTRL_RTS_CTS; + vht_prot[0] |= MT_PROT_CTRL_RTS_CTS; + vht_prot[1] |= MT_PROT_CTRL_RTS_CTS; + vht_prot[2] |= MT_PROT_CTRL_RTS_CTS; + break; + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + prot[3] |= MT_PROT_CTRL_RTS_CTS; + prot[5] |= MT_PROT_CTRL_RTS_CTS; + vht_prot[1] |= MT_PROT_CTRL_RTS_CTS; + vht_prot[2] |= MT_PROT_CTRL_RTS_CTS; + break; + } + + if (non_gf) { + prot[4] |= MT_PROT_CTRL_RTS_CTS; + prot[5] |= MT_PROT_CTRL_RTS_CTS; + } + + for (i = 0; i < ARRAY_SIZE(prot); i++) + mt76_wr(dev, MT_CCK_PROT_CFG + i * 4, prot[i]); + + for (i = 0; i < ARRAY_SIZE(vht_prot); i++) + mt76_wr(dev, MT_TX_PROT_CFG6 + i * 4, vht_prot[i]); +} + void mt76x02_update_channel(struct mt76_dev *mdev) { struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index d1a8ed171537..91c76a050f7e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -194,6 +194,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, struct mt76x02_tx_status *stat, u8 *update); int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb, void *rxi); +void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, bool legacy_prot, + int ht_mode); void mt76x02_mac_set_rts_thresh(struct mt76x02_dev *dev, u32 val); void mt76x02_mac_setaddr(struct mt76x02_dev *dev, u8 *addr); void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h index d1ac847adce5..20ad07638699 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h @@ -443,6 +443,7 @@ #define MT_PROT_RATE_OFDM_6 0x2000 #define MT_PROT_RATE_OFDM_24 0x2004 #define MT_PROT_RATE_DUP_OFDM_24 0x2084 +#define MT_PROT_RATE_SGI_OFDM_24 0x2104 #define MT_PROT_TXOP_ALLOW_ALL GENMASK(25, 20) #define MT_PROT_TXOP_ALLOW_BW20 (MT_PROT_TXOP_ALLOW_ALL & \ ~MT_PROT_TXOP_ALLOW_MM40 & \ diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 364e7b2ebf99..aac64d0969b8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -661,6 +661,10 @@ void mt76x02_bss_info_changed(struct ieee80211_hw *hw, tasklet_enable(&dev->pre_tbtt_tasklet); } + if (changed & BSS_CHANGED_HT || changed & BSS_CHANGED_ERP_CTS_PROT) + mt76x02_mac_set_tx_protection(dev, info->use_cts_prot, + info->ht_operation_mode); + if (changed & BSS_CHANGED_BEACON_INT) { mt76_rmw_field(dev, MT_BEACON_TIME_CFG, MT_BEACON_TIME_CFG_INTVAL, From 9bc29420d745e775d36d224e4a6999ee77a600e1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 10 Dec 2018 11:53:58 +0100 Subject: [PATCH 0041/1436] mt76x0: configure MT_VHT_HT_FBK_CFG1 Configure MT_VHT_HT_FBK_CFG1 values similar like vendor driver. Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h index a1657922758e..0290ba5869a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h @@ -88,6 +88,7 @@ static const struct mt76_reg_pair mt76x0_mac_reg_table[] = { { MT_TX_PROT_CFG6, 0xe3f42004 }, { MT_TX_PROT_CFG7, 0xe3f42084 }, { MT_TX_PROT_CFG8, 0xe3f42104 }, + { MT_VHT_HT_FBK_CFG1, 0xedcba980 }, }; static const struct mt76_reg_pair mt76x0_bbp_init_tab[] = { From bf349ce16ef45bf5a0290586124be18a09826860 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 10 Dec 2018 12:54:05 +0100 Subject: [PATCH 0042/1436] mt76x2: add static qualifier to mt76x2_init_hardware Add static qualifier to mt76x2_init_hardware routine since it is used just in pci_init.c Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h | 1 - drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h index b259e4b50f1e..28ec19acf9db 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h @@ -53,7 +53,6 @@ struct mt76x02_dev *mt76x2_alloc_device(struct device *pdev); int mt76x2_register_device(struct mt76x02_dev *dev); void mt76x2_phy_power_on(struct mt76x02_dev *dev); -int mt76x2_init_hardware(struct mt76x02_dev *dev); void mt76x2_stop_hardware(struct mt76x02_dev *dev); int mt76x2_eeprom_init(struct mt76x02_dev *dev); int mt76x2_apply_calibration_data(struct mt76x02_dev *dev, int channel); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 7f4ea2d00f42..2f05eacb8946 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -260,7 +260,7 @@ mt76x2_power_on(struct mt76x02_dev *dev) mt76x2_power_on_rf(dev, 1); } -int mt76x2_init_hardware(struct mt76x02_dev *dev) +static int mt76x2_init_hardware(struct mt76x02_dev *dev) { int ret; From 35c572818ca2a1d9bd3a76eab6366b9a299f0a0b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 12 Dec 2018 22:51:53 +0100 Subject: [PATCH 0043/1436] mt76: dfs: run mt76x02_dfs_set_domain atomically Grab mt76_dev mutex in mt76x02_dfs_set_domain since it runs concurrently with mt76x{0,2}_set_channel routines Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c index 054609c634a2..5bd523b091b6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c @@ -881,12 +881,14 @@ mt76x02_dfs_set_domain(struct mt76x02_dev *dev, { struct mt76x02_dfs_pattern_detector *dfs_pd = &dev->dfs_pd; + mutex_lock(&dev->mt76.mutex); if (dfs_pd->region != region) { tasklet_disable(&dfs_pd->dfs_tasklet); dfs_pd->region = region; mt76x02_dfs_init_params(dev); tasklet_enable(&dfs_pd->dfs_tasklet); } + mutex_unlock(&dev->mt76.mutex); } void mt76x02_regd_notifier(struct wiphy *wiphy, From 3fd612df8ac79d8ac902985cb570f1723a82e979 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 12 Dec 2018 22:51:54 +0100 Subject: [PATCH 0044/1436] mt76x2: init: set default value for MT_TX_LINK_CFG Update default value for MT_TX_LINK_CFG according to vendor driver Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c index 54a9b5fac787..f8534362e2c8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c @@ -143,6 +143,7 @@ void mt76_write_mac_initvals(struct mt76x02_dev *dev) { MT_VHT_HT_FBK_CFG1, 0xedcba980 }, { MT_PROT_AUTO_TX_CFG, 0x00830083 }, { MT_HT_CTRL_CFG, 0x000001ff }, + { MT_TX_LINK_CFG, 0x00001020 }, }; struct mt76_reg_pair prot_vals[] = { { MT_CCK_PROT_CFG, DEFAULT_PROT_CFG_CCK }, From f82ce8d9bd49ed1c3487662c734badfbf560cfe0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 12 Dec 2018 22:51:55 +0100 Subject: [PATCH 0045/1436] mt76: add energy detect CCA support to mt76x{0,2}e drivers Ported from the reference driver. Should fix compliance with ETSI regulatories on preventing transmission while energy detect values are above the threshold. The code has been tested using an ath9k device running tx99 as noise generator Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt76x0/pci.c | 1 + .../net/wireless/mediatek/mt76/mt76x0/phy.c | 2 + drivers/net/wireless/mediatek/mt76/mt76x02.h | 6 ++ .../net/wireless/mediatek/mt76/mt76x02_dfs.c | 4 + .../net/wireless/mediatek/mt76/mt76x02_mac.c | 85 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt76x02_mac.h | 2 + .../net/wireless/mediatek/mt76/mt76x02_regs.h | 8 ++ .../wireless/mediatek/mt76/mt76x2/pci_init.c | 1 + .../wireless/mediatek/mt76/mt76x2/pci_phy.c | 2 + 9 files changed, 111 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 1906cc62690f..720b05e325a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -147,6 +147,7 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev) MT_CH_TIME_CFG_RX_AS_BUSY | MT_CH_TIME_CFG_NAV_AS_BUSY | MT_CH_TIME_CFG_EIFS_AS_BUSY | + MT_CH_CCA_RC_EN | FIELD_PREP(MT_CH_TIME_CFG_CH_TIMER_CLR, 1)); err = mt76x0_register_device(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 1eb1a802ed20..b2b38b9fcac7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -1013,6 +1013,8 @@ int mt76x0_phy_set_channel(struct mt76x02_dev *dev, mt76x0_phy_calibrate(dev, false); mt76x0_phy_set_txpower(dev); + mt76x02_edcca_init(dev); + ieee80211_queue_delayed_work(dev->mt76.hw, &dev->cal_work, MT_CALIBRATE_INTERVAL); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 6782665049dd..f383fdd914c5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -101,6 +101,12 @@ struct mt76x02_dev { u8 slottime; struct mt76x02_dfs_pattern_detector dfs_pd; + + /* edcca monitor */ + bool ed_tx_blocked; + bool ed_monitor; + u8 ed_trigger; + u8 ed_silent; }; extern struct ieee80211_rate mt76x02_rates[12]; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c index 5bd523b091b6..19fdcab746a0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c @@ -884,6 +884,10 @@ mt76x02_dfs_set_domain(struct mt76x02_dev *dev, mutex_lock(&dev->mt76.mutex); if (dfs_pd->region != region) { tasklet_disable(&dfs_pd->dfs_tasklet); + + dev->ed_monitor = region == NL80211_DFS_ETSI; + mt76x02_edcca_init(dev); + dfs_pd->region = region; mt76x02_dfs_init_params(dev); tasklet_enable(&dfs_pd->dfs_tasklet); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index ddc1c3146feb..2e06e1f41810 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -847,6 +847,88 @@ static void mt76x02_check_mac_err(struct mt76x02_dev *dev) MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); } +static void +mt76x02_edcca_tx_enable(struct mt76x02_dev *dev, bool enable) +{ + if (enable) { + u32 data; + + mt76_set(dev, MT_MAC_SYS_CTRL, MT_MAC_SYS_CTRL_ENABLE_TX); + mt76_set(dev, MT_AUTO_RSP_CFG, MT_AUTO_RSP_EN); + /* enable pa-lna */ + data = mt76_rr(dev, MT_TX_PIN_CFG); + data |= MT_TX_PIN_CFG_TXANT | + MT_TX_PIN_CFG_RXANT | + MT_TX_PIN_RFTR_EN | + MT_TX_PIN_TRSW_EN; + mt76_wr(dev, MT_TX_PIN_CFG, data); + } else { + mt76_clear(dev, MT_MAC_SYS_CTRL, MT_MAC_SYS_CTRL_ENABLE_TX); + mt76_clear(dev, MT_AUTO_RSP_CFG, MT_AUTO_RSP_EN); + /* disable pa-lna */ + mt76_clear(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT); + mt76_clear(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_RXANT); + } + dev->ed_tx_blocked = !enable; +} + +void mt76x02_edcca_init(struct mt76x02_dev *dev) +{ + dev->ed_trigger = 0; + dev->ed_silent = 0; + + if (dev->ed_monitor) { + struct ieee80211_channel *chan = dev->mt76.chandef.chan; + u8 ed_th = chan->band == NL80211_BAND_5GHZ ? 0x0e : 0x20; + + mt76_clear(dev, MT_TX_LINK_CFG, MT_TX_CFACK_EN); + mt76_set(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); + mt76_rmw(dev, MT_BBP(AGC, 2), GENMASK(15, 0), + ed_th << 8 | ed_th); + if (!is_mt76x2(dev)) + mt76_set(dev, MT_TXOP_HLDR_ET, + MT_TXOP_HLDR_TX40M_BLK_EN); + } else { + mt76_set(dev, MT_TX_LINK_CFG, MT_TX_CFACK_EN); + mt76_clear(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); + if (is_mt76x2(dev)) { + mt76_wr(dev, MT_BBP(AGC, 2), 0x00007070); + } else { + mt76_wr(dev, MT_BBP(AGC, 2), 0x003a6464); + mt76_clear(dev, MT_TXOP_HLDR_ET, + MT_TXOP_HLDR_TX40M_BLK_EN); + } + } + mt76x02_edcca_tx_enable(dev, true); +} +EXPORT_SYMBOL_GPL(mt76x02_edcca_init); + +#define MT_EDCCA_TH 90 +#define MT_EDCCA_BLOCK_TH 2 +static void mt76x02_edcca_check(struct mt76x02_dev *dev) +{ + u32 val, busy; + + val = mt76_rr(dev, MT_ED_CCA_TIMER); + busy = (val * 100) / jiffies_to_usecs(MT_CALIBRATE_INTERVAL); + busy = min_t(u32, busy, 100); + + if (busy > MT_EDCCA_TH) { + dev->ed_trigger++; + dev->ed_silent = 0; + } else { + dev->ed_silent++; + dev->ed_trigger = 0; + } + + if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && + !dev->ed_tx_blocked) + mt76x02_edcca_tx_enable(dev, false); + else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && + dev->ed_tx_blocked) + mt76x02_edcca_tx_enable(dev, true); +} + void mt76x02_mac_work(struct work_struct *work) { struct mt76x02_dev *dev = container_of(work, struct mt76x02_dev, @@ -867,6 +949,9 @@ void mt76x02_mac_work(struct work_struct *work) if (!dev->beacon_mask) mt76x02_check_mac_err(dev); + if (dev->ed_monitor) + mt76x02_edcca_check(dev); + mutex_unlock(&dev->mt76.mutex); mt76_tx_status_check(&dev->mt76, NULL, false); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index 91c76a050f7e..735fe96440ba 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -212,4 +212,6 @@ int mt76x02_mac_set_beacon(struct mt76x02_dev *dev, u8 vif_idx, struct sk_buff *skb); void mt76x02_mac_set_beacon_enable(struct mt76x02_dev *dev, u8 vif_idx, bool val); + +void mt76x02_edcca_init(struct mt76x02_dev *dev); #endif diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h index 20ad07638699..b8d1ffbac6b5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h @@ -318,6 +318,7 @@ #define MT_CH_TIME_CFG_NAV_AS_BUSY BIT(3) #define MT_CH_TIME_CFG_EIFS_AS_BUSY BIT(4) #define MT_CH_TIME_CFG_MDRDY_CNT_EN BIT(5) +#define MT_CH_CCA_RC_EN BIT(6) #define MT_CH_TIME_CFG_CH_TIMER_CLR GENMASK(9, 8) #define MT_CH_TIME_CFG_MDRDY_CLR GENMASK(11, 10) @@ -378,6 +379,9 @@ #define MT_TX_PWR_CFG_4 0x1324 #define MT_TX_PIN_CFG 0x1328 #define MT_TX_PIN_CFG_TXANT GENMASK(3, 0) +#define MT_TX_PIN_CFG_RXANT GENMASK(11, 8) +#define MT_TX_PIN_RFTR_EN BIT(16) +#define MT_TX_PIN_TRSW_EN BIT(18) #define MT_TX_BAND_CFG 0x132c #define MT_TX_BAND_CFG_UPPER_40M BIT(0) @@ -398,6 +402,7 @@ #define MT_TXOP_CTRL_CFG 0x1340 #define MT_TXOP_TRUN_EN GENMASK(5, 0) #define MT_TXOP_EXT_CCA_DLY GENMASK(15, 8) +#define MT_TXOP_ED_CCA_EN BIT(20) #define MT_TX_RTS_CFG 0x1344 #define MT_TX_RTS_CFG_RETRY_LIMIT GENMASK(7, 0) @@ -409,6 +414,7 @@ #define MT_TX_RETRY_CFG 0x134c #define MT_TX_LINK_CFG 0x1350 +#define MT_TX_CFACK_EN BIT(12) #define MT_VHT_HT_FBK_CFG0 0x1354 #define MT_VHT_HT_FBK_CFG1 0x1358 #define MT_LG_FBK_CFG0 0x135c @@ -512,6 +518,7 @@ #define MT_RX_FILTR_CFG_CTRL_RSV BIT(16) #define MT_AUTO_RSP_CFG 0x1404 +#define MT_AUTO_RSP_EN BIT(0) #define MT_AUTO_RSP_PREAMB_SHORT BIT(4) #define MT_LEGACY_BASIC_RATE 0x1408 #define MT_HT_BASIC_RATE 0x140c @@ -533,6 +540,7 @@ #define MT_PN_PAD_MODE 0x150c #define MT_TXOP_HLDR_ET 0x1608 +#define MT_TXOP_HLDR_TX40M_BLK_EN BIT(1) #define MT_PROT_AUTO_TX_CFG 0x1648 #define MT_PROT_AUTO_TX_CFG_PROT_PADJ GENMASK(11, 8) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 2f05eacb8946..9f313824eead 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -151,6 +151,7 @@ static int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard) MT_CH_TIME_CFG_RX_AS_BUSY | MT_CH_TIME_CFG_NAV_AS_BUSY | MT_CH_TIME_CFG_EIFS_AS_BUSY | + MT_CH_CCA_RC_EN | FIELD_PREP(MT_CH_TIME_CFG_CH_TIMER_CLR, 1)); mt76x02_set_tx_ackto(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_phy.c index da7cd40f56ff..65ed62229a5b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_phy.c @@ -254,6 +254,8 @@ int mt76x2_phy_set_channel(struct mt76x02_dev *dev, 0x38); } + mt76x02_edcca_init(dev); + ieee80211_queue_delayed_work(mt76_hw(dev), &dev->cal_work, MT_CALIBRATE_INTERVAL); From c09f4d0a1f81720342d4d0e1d4a2b601903f132a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 28 Dec 2018 11:00:47 +0100 Subject: [PATCH 0046/1436] mt76: mac: minor optimizations in mt76x02_mac_tx_rate_val Do not set bw variable to zero for legacy rates since it is already initialized to zero. Moreover set nss to 1 just for legacy rates since nss will be properly set for VHT/HT rates Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 2e06e1f41810..cc584ea4a8d7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -130,10 +130,8 @@ static __le16 mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, const struct ieee80211_tx_rate *rate, u8 *nss_val) { + u8 phy, rate_idx, nss, bw = 0; u16 rateval; - u8 phy, rate_idx; - u8 nss = 1; - u8 bw = 0; if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { rate_idx = rate->idx; @@ -164,7 +162,7 @@ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, phy = val >> 8; rate_idx = val & 0xff; - bw = 0; + nss = 1; } rateval = FIELD_PREP(MT_RXWI_RATE_INDEX, rate_idx); From 87e86f90196fa074f8f6ac0e77d1287d9239878b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 3 Dec 2018 15:34:24 +0100 Subject: [PATCH 0047/1436] mt76: dma: do not build skb if reported len does not fit in buf_size Precompute data length in order to avoid to allocate the related skb data structure if reported length does not fit in queue buf_size Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index b7fd2e110663..1db163c40dcf 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -417,10 +417,9 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, static int mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) { + int len, data_len, done = 0; struct sk_buff *skb; unsigned char *data; - int len; - int done = 0; bool more; while (done < budget) { @@ -430,7 +429,12 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) if (!data) break; - if (q->buf_size < len + q->buf_offset) { + if (q->rx_head) + data_len = q->buf_size; + else + data_len = SKB_WITH_OVERHEAD(q->buf_size); + + if (data_len < len + q->buf_offset) { dev_kfree_skb(q->rx_head); q->rx_head = NULL; @@ -448,12 +452,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) skb_free_frag(data); continue; } - skb_reserve(skb, q->buf_offset); - if (skb->tail + len > skb->end) { - dev_kfree_skb(skb); - continue; - } if (q == &dev->q_rx[MT_RXQ_MCU]) { u32 *rxfce = (u32 *) skb->cb; From c1e0d2be0acff5e99a59ddcc5af415e48abc6c5e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 28 Dec 2018 15:44:09 +0100 Subject: [PATCH 0048/1436] mt76: mmio: introduce mt76x02_check_tx_hang watchdog Port mt76x02_check_tx_hang watchdog from vendor driver in order to perform a device reset when tx mac/dma logic hangs. Tx mac/dma stuck has been observed when the device is heavy loaded or in a noisy environment. Moreover introduce wdt delayed work in order to run tx_hang watchdog. For the moment run mt76x02_check_tx_hang watchdog just on mt76x2 devices since the issue has not been observed on mt76x0 driver yet Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt76x02.h | 8 ++ .../wireless/mediatek/mt76/mt76x02_debugfs.c | 2 + .../net/wireless/mediatek/mt76/mt76x02_mac.c | 1 - .../net/wireless/mediatek/mt76/mt76x02_mmio.c | 124 ++++++++++++++++++ .../net/wireless/mediatek/mt76/mt76x02_util.c | 2 + .../wireless/mediatek/mt76/mt76x2/pci_init.c | 1 + .../wireless/mediatek/mt76/mt76x2/pci_main.c | 2 + 8 files changed, 140 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5cd508a68609..8ef430d8afc4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -421,6 +421,7 @@ struct mt76_dev { struct mt76_queue q_tx[__MT_TXQ_MAX]; struct mt76_queue q_rx[__MT_RXQ_MAX]; const struct mt76_queue_ops *queue_ops; + int tx_dma_idx[4]; wait_queue_head_t tx_wait; struct sk_buff_head status_list; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index f383fdd914c5..f594e8c82500 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -28,6 +28,9 @@ #define MT_CALIBRATE_INTERVAL HZ +#define MT_WATCHDOG_TIME (HZ / 10) +#define MT_TX_HANG_TH 10 + #define MT_MAX_CHAINS 2 struct mt76x02_rx_freq_cal { s8 high_gain[MT_MAX_CHAINS]; @@ -79,6 +82,7 @@ struct mt76x02_dev { struct tasklet_struct pre_tbtt_tasklet; struct delayed_work cal_work; struct delayed_work mac_work; + struct delayed_work wdt_work; u32 aggr_stats[32]; @@ -89,6 +93,9 @@ struct mt76x02_dev { u8 tbtt_count; u16 beacon_int; + u32 tx_hang_reset; + u8 tx_hang_check; + struct mt76x02_calibration cal; s8 target_power; @@ -142,6 +149,7 @@ s8 mt76x02_tx_get_max_txpwr_adj(struct mt76x02_dev *dev, const struct ieee80211_tx_rate *rate); s8 mt76x02_tx_get_txpwr_adj(struct mt76x02_dev *dev, s8 txpwr, s8 max_txpwr_adj); +void mt76x02_wdt_work(struct work_struct *work); void mt76x02_tx_set_txpwr_auto(struct mt76x02_dev *dev, s8 txpwr); void mt76x02_set_tx_ackto(struct mt76x02_dev *dev); void mt76x02_set_coverage_class(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c index a9d52ba1e270..7580c5c986ff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c @@ -133,5 +133,7 @@ void mt76x02_init_debugfs(struct mt76x02_dev *dev) read_txpower); debugfs_create_devm_seqfile(dev->mt76.dev, "agc", dir, read_agc); + + debugfs_create_u32("tx_hang_reset", 0400, dir, &dev->tx_hang_reset); } EXPORT_SYMBOL_GPL(mt76x02_init_debugfs); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index cc584ea4a8d7..6adea0278284 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -943,7 +943,6 @@ void mt76x02_mac_work(struct work_struct *work) dev->aggr_stats[idx++] += val >> 16; } - /* XXX: check beacon stuck for ap mode */ if (!dev->beacon_mask) mt76x02_check_mac_err(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 6974acc75e2b..6236c6121c01 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -385,3 +385,127 @@ void mt76x02_mac_start(struct mt76x02_dev *dev) MT_INT_TX_STAT); } EXPORT_SYMBOL_GPL(mt76x02_mac_start); + +static bool mt76x02_tx_hang(struct mt76x02_dev *dev) +{ + u32 dma_idx, prev_dma_idx; + struct mt76_queue *q; + int i; + + for (i = 0; i < 4; i++) { + q = &dev->mt76.q_tx[i]; + + if (!q->queued) + continue; + + prev_dma_idx = dev->mt76.tx_dma_idx[i]; + dma_idx = ioread32(&q->regs->dma_idx); + dev->mt76.tx_dma_idx[i] = dma_idx; + + if (prev_dma_idx == dma_idx) + break; + } + + return i < 4; +} + +static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) +{ + u32 mask = dev->mt76.mmio.irqmask; + int i; + + ieee80211_stop_queues(dev->mt76.hw); + set_bit(MT76_RESET, &dev->mt76.state); + + tasklet_disable(&dev->pre_tbtt_tasklet); + tasklet_disable(&dev->tx_tasklet); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) + napi_disable(&dev->mt76.napi[i]); + + mutex_lock(&dev->mt76.mutex); + + if (dev->beacon_mask) + mt76_clear(dev, MT_BEACON_TIME_CFG, + MT_BEACON_TIME_CFG_BEACON_TX | + MT_BEACON_TIME_CFG_TBTT_EN); + + mt76x02_irq_disable(dev, mask); + + /* perform device reset */ + mt76_clear(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); + mt76_wr(dev, MT_MAC_SYS_CTRL, 0); + mt76_clear(dev, MT_WPDMA_GLO_CFG, + MT_WPDMA_GLO_CFG_TX_DMA_EN | MT_WPDMA_GLO_CFG_RX_DMA_EN); + usleep_range(5000, 10000); + mt76_wr(dev, MT_INT_SOURCE_CSR, 0xffffffff); + + /* let fw reset DMA */ + mt76_set(dev, 0x734, 0x3); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.q_tx); i++) + mt76_queue_tx_cleanup(dev, i, true); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++) + mt76_queue_rx_reset(dev, i); + + mt76_wr(dev, MT_MAC_SYS_CTRL, + MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); + mt76_set(dev, MT_WPDMA_GLO_CFG, + MT_WPDMA_GLO_CFG_TX_DMA_EN | MT_WPDMA_GLO_CFG_RX_DMA_EN); + if (dev->ed_monitor) + mt76_set(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); + + if (dev->beacon_mask) + mt76_set(dev, MT_BEACON_TIME_CFG, + MT_BEACON_TIME_CFG_BEACON_TX | + MT_BEACON_TIME_CFG_TBTT_EN); + + mt76x02_irq_enable(dev, mask); + + mutex_unlock(&dev->mt76.mutex); + + clear_bit(MT76_RESET, &dev->mt76.state); + + tasklet_enable(&dev->tx_tasklet); + tasklet_schedule(&dev->tx_tasklet); + + tasklet_enable(&dev->pre_tbtt_tasklet); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) { + napi_enable(&dev->mt76.napi[i]); + napi_schedule(&dev->mt76.napi[i]); + } + + ieee80211_wake_queues(dev->mt76.hw); + + mt76_txq_schedule_all(&dev->mt76); +} + +static void mt76x02_check_tx_hang(struct mt76x02_dev *dev) +{ + if (mt76x02_tx_hang(dev)) { + if (++dev->tx_hang_check < MT_TX_HANG_TH) + return; + + mt76x02_watchdog_reset(dev); + + dev->tx_hang_reset++; + dev->tx_hang_check = 0; + memset(dev->mt76.tx_dma_idx, 0xff, + sizeof(dev->mt76.tx_dma_idx)); + } else { + dev->tx_hang_check = 0; + } +} + +void mt76x02_wdt_work(struct work_struct *work) +{ + struct mt76x02_dev *dev = container_of(work, struct mt76x02_dev, + wdt_work.work); + + mt76x02_check_tx_hang(dev); + + ieee80211_queue_delayed_work(mt76_hw(dev), &dev->wdt_work, + MT_WATCHDOG_TIME); +} diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index aac64d0969b8..3c878f18e05d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -93,6 +93,8 @@ void mt76x02_init_device(struct mt76x02_dev *dev) MT_DMA_HDR_LEN; wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); } else { + INIT_DELAYED_WORK(&dev->wdt_work, mt76x02_wdt_work); + mt76x02_dfs_init_detector(dev); wiphy->reg_notifier = mt76x02_regd_notifier; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 9f313824eead..34418e575542 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -301,6 +301,7 @@ void mt76x2_stop_hardware(struct mt76x02_dev *dev) { cancel_delayed_work_sync(&dev->cal_work); cancel_delayed_work_sync(&dev->mac_work); + cancel_delayed_work_sync(&dev->wdt_work); mt76x02_mcu_set_radio_state(dev, false); mt76x2_mac_stop(dev, false); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c index b54a32397486..6a87b88eb036 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c @@ -34,6 +34,8 @@ mt76x2_start(struct ieee80211_hw *hw) ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mac_work, MT_CALIBRATE_INTERVAL); + ieee80211_queue_delayed_work(mt76_hw(dev), &dev->wdt_work, + MT_WATCHDOG_TIME); set_bit(MT76_STATE_RUNNING, &dev->mt76.state); From 32fb47fd9c5553ab77ad0380e1ac421d7b3ac564 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 30 Dec 2018 13:26:40 +0000 Subject: [PATCH 0049/1436] mt76: make const array 'data' static, shrinks object size Don't populate the const array 'data' on the stack but instead make it static. Makes the object code smaller by 78 bytes: Before: text data bss dec hex filename 5438 1080 0 6518 1976 mediatek/mt76/mt76x2/usb_mcu.o After: text data bss dec hex filename 5296 1144 0 6440 1928 mediatek/mt76/mt76x2/usb_mcu.o (gcc version 8.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/usb_mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mcu.c index 45a95ee3a415..152d41fe9ff5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mcu.c @@ -39,7 +39,7 @@ static void mt76x2u_mcu_load_ivb(struct mt76x02_dev *dev) static void mt76x2u_mcu_enable_patch(struct mt76x02_dev *dev) { struct mt76_usb *usb = &dev->mt76.usb; - const u8 data[] = { + static const u8 data[] = { 0x6f, 0xfc, 0x08, 0x01, 0x20, 0x04, 0x00, 0x00, 0x00, 0x09, 0x00, From e76deac6f00bd855215454228fbb70f2f5f0eea4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Dec 2018 15:42:54 +0100 Subject: [PATCH 0050/1436] mt76: dma: avoid indirect call in mt76_dma_tx_queue_skb Call mt76_dma_add_buf routine directly in mt76_dma_tx_queue_skb and avoid indirect call if not necessary Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 1db163c40dcf..e769c8a555dd 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -300,7 +300,7 @@ int mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, if (q->queued + (n + 1) / 2 >= q->ndesc - 1) goto unmap; - return dev->queue_ops->add_buf(dev, q, buf, n, tx_info, skb, t); + return mt76_dma_add_buf(dev, q, buf, n, tx_info, skb, t); unmap: ret = -ENOMEM; From 3121742915b552f1994775f9d34512ccc0a82e66 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 10 Jan 2019 12:14:04 +0100 Subject: [PATCH 0051/1436] mt76: use proper name for __MT76x02_H macro Use proper name for __MT76x02_H macro in mt76x02.h Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index f594e8c82500..d8bd0509a790 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -15,8 +15,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef __MT76X02_UTIL_H -#define __MT76X02_UTIL_H +#ifndef __MT76x02_H +#define __MT76x02_H #include @@ -238,4 +238,4 @@ mt76x02_rx_get_sta_wcid(struct mt76x02_sta *sta, bool unicast) return &sta->vif->group_wcid; } -#endif +#endif /* __MT76x02_H */ From 5c9decdfbb409d3c410c3e2308cb6110557eb0a7 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 10 Jan 2019 14:27:06 +0100 Subject: [PATCH 0052/1436] mt76: add led support to mt76x0e driver Move mt76x02 led support in mt76x02-lib module in order to add tpt led trigger to mt76x0e driver Tested-by: LGA1150 Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt76x02_regs.h | 23 ++++++++ .../net/wireless/mediatek/mt76/mt76x02_util.c | 59 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt76x2/mcu.h | 23 -------- .../wireless/mediatek/mt76/mt76x2/pci_init.c | 54 ----------------- 4 files changed, 82 insertions(+), 77 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h index b8d1ffbac6b5..7401cb94fb72 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h @@ -230,6 +230,29 @@ #define MT_COM_REG2 0x0738 #define MT_COM_REG3 0x073C +#define MT_LED_CTRL 0x0770 +#define MT_LED_CTRL_REPLAY(_n) BIT(0 + (8 * (_n))) +#define MT_LED_CTRL_POLARITY(_n) BIT(1 + (8 * (_n))) +#define MT_LED_CTRL_TX_BLINK_MODE(_n) BIT(2 + (8 * (_n))) +#define MT_LED_CTRL_KICK(_n) BIT(7 + (8 * (_n))) + +#define MT_LED_TX_BLINK_0 0x0774 +#define MT_LED_TX_BLINK_1 0x0778 + +#define MT_LED_S0_BASE 0x077C +#define MT_LED_S0(_n) (MT_LED_S0_BASE + 8 * (_n)) +#define MT_LED_S1_BASE 0x0780 +#define MT_LED_S1(_n) (MT_LED_S1_BASE + 8 * (_n)) +#define MT_LED_STATUS_OFF_MASK GENMASK(31, 24) +#define MT_LED_STATUS_OFF(_v) (((_v) << __ffs(MT_LED_STATUS_OFF_MASK)) & \ + MT_LED_STATUS_OFF_MASK) +#define MT_LED_STATUS_ON_MASK GENMASK(23, 16) +#define MT_LED_STATUS_ON(_v) (((_v) << __ffs(MT_LED_STATUS_ON_MASK)) & \ + MT_LED_STATUS_ON_MASK) +#define MT_LED_STATUS_DURATION_MASK GENMASK(15, 8) +#define MT_LED_STATUS_DURATION(_v) (((_v) << __ffs(MT_LED_STATUS_DURATION_MASK)) & \ + MT_LED_STATUS_DURATION_MASK) + #define MT_FCE_PSE_CTRL 0x0800 #define MT_FCE_PARAMETERS 0x0804 #define MT_FCE_CSO 0x0808 diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 3c878f18e05d..d80c8eb5a0ec 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -75,6 +75,58 @@ static const struct ieee80211_iface_combination mt76x02_if_comb[] = { } }; +static void +mt76x02_led_set_config(struct mt76_dev *mdev, u8 delay_on, + u8 delay_off) +{ + struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, + mt76); + u32 val; + + val = MT_LED_STATUS_DURATION(0xff) | + MT_LED_STATUS_OFF(delay_off) | + MT_LED_STATUS_ON(delay_on); + + mt76_wr(dev, MT_LED_S0(mdev->led_pin), val); + mt76_wr(dev, MT_LED_S1(mdev->led_pin), val); + + val = MT_LED_CTRL_REPLAY(mdev->led_pin) | + MT_LED_CTRL_KICK(mdev->led_pin); + if (mdev->led_al) + val |= MT_LED_CTRL_POLARITY(mdev->led_pin); + mt76_wr(dev, MT_LED_CTRL, val); +} + +static int +mt76x02_led_set_blink(struct led_classdev *led_cdev, + unsigned long *delay_on, + unsigned long *delay_off) +{ + struct mt76_dev *mdev = container_of(led_cdev, struct mt76_dev, + led_cdev); + u8 delta_on, delta_off; + + delta_off = max_t(u8, *delay_off / 10, 1); + delta_on = max_t(u8, *delay_on / 10, 1); + + mt76x02_led_set_config(mdev, delta_on, delta_off); + + return 0; +} + +static void +mt76x02_led_set_brightness(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct mt76_dev *mdev = container_of(led_cdev, struct mt76_dev, + led_cdev); + + if (!brightness) + mt76x02_led_set_config(mdev, 0, 0xff); + else + mt76x02_led_set_config(mdev, 0xff, 0); +} + void mt76x02_init_device(struct mt76x02_dev *dev) { struct ieee80211_hw *hw = mt76_hw(dev); @@ -109,6 +161,13 @@ void mt76x02_init_device(struct mt76x02_dev *dev) BIT(NL80211_IFTYPE_ADHOC); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_VHT_IBSS); + + /* init led callbacks */ + if (IS_ENABLED(CONFIG_MT76_LEDS)) { + dev->mt76.led_cdev.brightness_set = + mt76x02_led_set_brightness; + dev->mt76.led_cdev.blink_set = mt76x02_led_set_blink; + } } hw->sta_data_size = sizeof(struct mt76x02_sta); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/mcu.h b/drivers/net/wireless/mediatek/mt76/mt76x2/mcu.h index acfa2b570c7c..40ef43926c06 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/mcu.h @@ -26,29 +26,6 @@ #define MT_MCU_PCIE_REMAP_BASE2 0x0744 #define MT_MCU_PCIE_REMAP_BASE3 0x0748 -#define MT_LED_CTRL 0x0770 -#define MT_LED_CTRL_REPLAY(_n) BIT(0 + (8 * (_n))) -#define MT_LED_CTRL_POLARITY(_n) BIT(1 + (8 * (_n))) -#define MT_LED_CTRL_TX_BLINK_MODE(_n) BIT(2 + (8 * (_n))) -#define MT_LED_CTRL_KICK(_n) BIT(7 + (8 * (_n))) - -#define MT_LED_TX_BLINK_0 0x0774 -#define MT_LED_TX_BLINK_1 0x0778 - -#define MT_LED_S0_BASE 0x077C -#define MT_LED_S0(_n) (MT_LED_S0_BASE + 8 * (_n)) -#define MT_LED_S1_BASE 0x0780 -#define MT_LED_S1(_n) (MT_LED_S1_BASE + 8 * (_n)) -#define MT_LED_STATUS_OFF_MASK GENMASK(31, 24) -#define MT_LED_STATUS_OFF(_v) (((_v) << __ffs(MT_LED_STATUS_OFF_MASK)) & \ - MT_LED_STATUS_OFF_MASK) -#define MT_LED_STATUS_ON_MASK GENMASK(23, 16) -#define MT_LED_STATUS_ON(_v) (((_v) << __ffs(MT_LED_STATUS_ON_MASK)) & \ - MT_LED_STATUS_ON_MASK) -#define MT_LED_STATUS_DURATION_MASK GENMASK(15, 8) -#define MT_LED_STATUS_DURATION(_v) (((_v) << __ffs(MT_LED_STATUS_DURATION_MASK)) & \ - MT_LED_STATUS_DURATION_MASK) - #define MT_MCU_ROM_PATCH_OFFSET 0x80000 #define MT_MCU_ROM_PATCH_ADDR 0x90000 diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 34418e575542..4347d5e7a915 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -342,54 +342,6 @@ struct mt76x02_dev *mt76x2_alloc_device(struct device *pdev) return dev; } -static void mt76x2_led_set_config(struct mt76_dev *mt76, u8 delay_on, - u8 delay_off) -{ - struct mt76x02_dev *dev = container_of(mt76, struct mt76x02_dev, - mt76); - u32 val; - - val = MT_LED_STATUS_DURATION(0xff) | - MT_LED_STATUS_OFF(delay_off) | - MT_LED_STATUS_ON(delay_on); - - mt76_wr(dev, MT_LED_S0(mt76->led_pin), val); - mt76_wr(dev, MT_LED_S1(mt76->led_pin), val); - - val = MT_LED_CTRL_REPLAY(mt76->led_pin) | - MT_LED_CTRL_KICK(mt76->led_pin); - if (mt76->led_al) - val |= MT_LED_CTRL_POLARITY(mt76->led_pin); - mt76_wr(dev, MT_LED_CTRL, val); -} - -static int mt76x2_led_set_blink(struct led_classdev *led_cdev, - unsigned long *delay_on, - unsigned long *delay_off) -{ - struct mt76_dev *mt76 = container_of(led_cdev, struct mt76_dev, - led_cdev); - u8 delta_on, delta_off; - - delta_off = max_t(u8, *delay_off / 10, 1); - delta_on = max_t(u8, *delay_on / 10, 1); - - mt76x2_led_set_config(mt76, delta_on, delta_off); - return 0; -} - -static void mt76x2_led_set_brightness(struct led_classdev *led_cdev, - enum led_brightness brightness) -{ - struct mt76_dev *mt76 = container_of(led_cdev, struct mt76_dev, - led_cdev); - - if (!brightness) - mt76x2_led_set_config(mt76, 0, 0xff); - else - mt76x2_led_set_config(mt76, 0xff, 0); -} - int mt76x2_register_device(struct mt76x02_dev *dev) { int ret; @@ -404,12 +356,6 @@ int mt76x2_register_device(struct mt76x02_dev *dev) mt76x02_config_mac_addr_list(dev); - /* init led callbacks */ - if (IS_ENABLED(CONFIG_MT76_LEDS)) { - dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness; - dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink; - } - ret = mt76_register_device(&dev->mt76, true, mt76x02_rates, ARRAY_SIZE(mt76x02_rates)); if (ret) From 2d2d478576d71000b29c52668c5712c825ee9af8 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Wed, 9 Jan 2019 10:13:55 +0800 Subject: [PATCH 0053/1436] pinctrl: mediatek: fix Kconfig build errors for moore core on i386 or x86_64: Lots of build errors for drivers/pinctrl/mediatek/pinctrl-moore.c when CONFIG_OF is not enabled (but COMPILE_TEST is). first this: WARNING: unmet direct dependencies detected for PINCTRL_MTK_MOORE Depends on [n]: PINCTRL [=y] && (ARCH_MEDIATEK || COMPILE_TEST [=y]) && OF [=n] Selected by [y]: - PINCTRL_MT7623 [=y] && PINCTRL [=y] && (ARCH_MEDIATEK || COMPILE_TEST [=y]) && (MACH_MT7623 || COMPILE_TEST [=y]) and then: ../drivers/pinctrl/mediatek/pinctrl-moore.c:22:44: error: array type has incomplete element type static const struct pinconf_generic_params mtk_custom_bindings[] = { (etc) Fixes: b5af33df50e9 ("pinctrl: mediatek: improve Kconfig dependencies") Cc: stable@vger.kernel.org Reported-by: Randy Dunlap Signed-off-by: Ryder Lee Acked-by: Sean Wang Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 1817786ab6aa..a005cbccb4f7 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -45,12 +45,14 @@ config PINCTRL_MT2701 config PINCTRL_MT7623 bool "Mediatek MT7623 pin control with generic binding" depends on MACH_MT7623 || COMPILE_TEST + depends on OF default MACH_MT7623 select PINCTRL_MTK_MOORE config PINCTRL_MT7629 bool "Mediatek MT7629 pin control" depends on MACH_MT7629 || COMPILE_TEST + depends on OF default MACH_MT7629 select PINCTRL_MTK_MOORE @@ -92,6 +94,7 @@ config PINCTRL_MT6797 config PINCTRL_MT7622 bool "MediaTek MT7622 pin control" + depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE From 00ccd4532c8a61e6c65ca62eb7b6da6a272c30f1 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 12 Jan 2019 09:02:51 +0800 Subject: [PATCH 0054/1436] ARM: dts: vf610-bk4: fix incorrect #address-cells for dspi3 The dspi3 is used as slave controller on vf610-bk4, and the default '#address-cells = <1>;' setting in vfxxx.dtsi causes the following DTC warning. DTC arch/arm/boot/dts/vf610-bk4.dtb ../arch/arm/boot/dts/vfxxx.dtsi:550.24-563.6: Warning (spi_bus_bridge): /soc/aips-bus@40080000/spi@400ad000: incorrect #address-cells for SPI bus also defined at ../arch/arm/boot/dts/vf610-bk4.dts:107.8-119.3 arch/arm/boot/dts/vf610-bk4.dtb: Warning (spi_bus_reg): Failed prerequisite 'spi_bus_bridge' For spi device used as slave controller, '#address-cells' should be 0. Let's overwrite the property in vf610-bk4.dts to fix the warning. Reported-by: Stephen Rothwell Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf610-bk4.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vf610-bk4.dts b/arch/arm/boot/dts/vf610-bk4.dts index 689c8930dce3..b08d561d6748 100644 --- a/arch/arm/boot/dts/vf610-bk4.dts +++ b/arch/arm/boot/dts/vf610-bk4.dts @@ -110,11 +110,11 @@ bus-num = <3>; status = "okay"; spi-slave; + #address-cells = <0>; - slave@0 { + slave { compatible = "lwn,bk4"; spi-max-frequency = <30000000>; - reg = <0>; }; }; From 08b88e80a1f476970284235278bf27a273b94118 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Dec 2018 18:12:03 +0000 Subject: [PATCH 0055/1436] ARM: dts: imx: replace gpio-key,wakeup with wakeup-source property Most of the legacy "gpio-key,wakeup" and "enable-sdio-wakeup" boolean properties are already replaced with "wakeup-source". However few occurrences of old property has popped up again, probably from the remnants in downstream trees. Almost all of those were remove couple of years back. Replace the legacy properties with the unified "wakeup-source" property introduced in the commit 700a38b27eef ("Input: gpio_keys - switch to using generic device properties") and commit 0dbcdc0622ea ("mmc: core: enable support for the standard "wakeup-source" property") Cc: Fabio Estevam Cc: Shawn Guo Cc: NXP Linux Team Signed-off-by: Sudeep Holla Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-pistachio.dts | 2 +- arch/arm/boot/dts/imx6sll-evk.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6q-pistachio.dts b/arch/arm/boot/dts/imx6q-pistachio.dts index 5edf858c8b86..a31b17eaf51c 100644 --- a/arch/arm/boot/dts/imx6q-pistachio.dts +++ b/arch/arm/boot/dts/imx6q-pistachio.dts @@ -103,7 +103,7 @@ power { label = "Power Button"; gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; - gpio-key,wakeup; + wakeup-source; linux,code = ; }; }; diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts index d8163705363e..4a31a415f88e 100644 --- a/arch/arm/boot/dts/imx6sll-evk.dts +++ b/arch/arm/boot/dts/imx6sll-evk.dts @@ -309,7 +309,7 @@ pinctrl-2 = <&pinctrl_usdhc3_200mhz>; cd-gpios = <&gpio3 22 GPIO_ACTIVE_LOW>; keep-power-in-suspend; - enable-sdio-wakeup; + wakeup-source; vmmc-supply = <®_sd3_vmmc>; status = "okay"; }; From 0808831dc62e90023ad14ff8da4804c7846e904b Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Sun, 30 Dec 2018 19:07:01 -0800 Subject: [PATCH 0056/1436] iio: chemical: atlas-ph-sensor: correct IIO_TEMP values to millicelsius IIO_TEMP scale value for temperature was incorrect and not in millicelsius as required by the ABI documentation. Signed-off-by: Matt Ranostay Fixes: 27dec00ecf2d (iio: chemical: add Atlas pH-SM sensor support) Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/atlas-ph-sensor.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index a406ad31b096..3a20cb5d9bff 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -444,9 +444,8 @@ static int atlas_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_TEMP: - *val = 1; /* 0.01 */ - *val2 = 100; - break; + *val = 10; + return IIO_VAL_INT; case IIO_PH: *val = 1; /* 0.001 */ *val2 = 1000; @@ -477,7 +476,7 @@ static int atlas_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct atlas_data *data = iio_priv(indio_dev); - __be32 reg = cpu_to_be32(val); + __be32 reg = cpu_to_be32(val / 10); if (val2 != 0 || val < 0 || val > 20000) return -EINVAL; From 9bcf15f75cac3c6a00d8f8083a635de9c8537799 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Jan 2019 19:36:18 +0100 Subject: [PATCH 0057/1436] iio: adc: axp288: Fix TS-pin handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit there were 3 issues with our handling of the TS-pin: 1) There are 2 ways how the firmware can disable monitoring of the TS-pin for designs which do not have a temperature-sensor for the battery: a) Clearing bit 0 of the AXP20X_ADC_EN1 register b) Setting bit 2 of the AXP288_ADC_TS_PIN_CTRL monitoring Prior to this commit we were unconditionally setting both bits to the value used on devices with a TS. This causes the temperature protection to kick in on devices without a TS, such as the Jumper ezbook v2, causing them to not charge under Linux. This commit fixes this by using regmap_update_bits when updating these 2 registers, leaving the 2 mentioned bits alone. The next 2 problems are related to our handling of the current-source for the TS-pin. The current-source used for the battery temp-sensor (TS) is shared with the GPADC. For proper fuel-gauge and charger operation the TS current-source needs to be permanently on. But to read the GPADC we need to temporary switch the TS current-source to ondemand, so that the GPADC can use it, otherwise we will always read an all 0 value. 2) Problem 2 is we were writing hardcoded values to the ADC TS pin-ctrl register, overwriting various other unrelated bits. Specifically we were overwriting the current-source setting for the TS and GPIO0 pins, forcing it to 80ųA independent of its original setting. On a Chuwi Vi10 tablet this was causing us to get a too high adc value (due to a too high current-source) resulting in the following errors being logged: ACPI Error: AE_ERROR, Returned by Handler for [UserDefinedRegion] ACPI Error: Method parse/execution failed \_SB.SXP1._TMP, AE_ERROR This commit fixes this by using regmap_update_bits to change only the relevant bits. 3) After reading the GPADC channel we were unconditionally enabling the TS current-source even on devices where the TS-pin is not used and the current-source thus was off before axp288_adc_read_raw call. This commit fixes this by making axp288_adc_set_ts a nop on devices where the ADC is not enabled for the TS-pin. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1610545 Fixes: 3091141d7803 ("iio: adc: axp288: Fix the GPADC pin ...") Signed-off-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp288_adc.c | 76 ++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 031d568b4972..4e339cfd0c54 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -27,9 +27,18 @@ #include #include -#define AXP288_ADC_EN_MASK 0xF1 -#define AXP288_ADC_TS_PIN_GPADC 0xF2 -#define AXP288_ADC_TS_PIN_ON 0xF3 +/* + * This mask enables all ADCs except for the battery temp-sensor (TS), that is + * left as-is to avoid breaking charging on devices without a temp-sensor. + */ +#define AXP288_ADC_EN_MASK 0xF0 +#define AXP288_ADC_TS_ENABLE 0x01 + +#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) +#define AXP288_ADC_TS_CURRENT_OFF (0 << 0) +#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) +#define AXP288_ADC_TS_CURRENT_ON_ONDEMAND (2 << 0) +#define AXP288_ADC_TS_CURRENT_ON (3 << 0) enum axp288_adc_id { AXP288_ADC_TS, @@ -44,6 +53,7 @@ enum axp288_adc_id { struct axp288_adc_info { int irq; struct regmap *regmap; + bool ts_enabled; }; static const struct iio_chan_spec axp288_adc_channels[] = { @@ -115,21 +125,33 @@ static int axp288_adc_read_channel(int *val, unsigned long address, return IIO_VAL_INT; } -static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, - unsigned long address) +/* + * The current-source used for the battery temp-sensor (TS) is shared + * with the GPADC. For proper fuel-gauge and charger operation the TS + * current-source needs to be permanently on. But to read the GPADC we + * need to temporary switch the TS current-source to ondemand, so that + * the GPADC can use it, otherwise we will always read an all 0 value. + */ +static int axp288_adc_set_ts(struct axp288_adc_info *info, + unsigned int mode, unsigned long address) { int ret; - /* channels other than GPADC do not need to switch TS pin */ + /* No need to switch the current-source if the TS pin is disabled */ + if (!info->ts_enabled) + return 0; + + /* Channels other than GPADC do not need the current source */ if (address != AXP288_GP_ADC_H) return 0; - ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, mode); if (ret) return ret; /* When switching to the GPADC pin give things some time to settle */ - if (mode == AXP288_ADC_TS_PIN_GPADC) + if (mode == AXP288_ADC_TS_CURRENT_ON_ONDEMAND) usleep_range(6000, 10000); return 0; @@ -145,14 +167,14 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); switch (mask) { case IIO_CHAN_INFO_RAW: - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND, chan->address)) { dev_err(&indio_dev->dev, "GPADC mode\n"); ret = -EINVAL; break; } ret = axp288_adc_read_channel(val, chan->address, info->regmap); - if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON, + if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON, chan->address)) dev_err(&indio_dev->dev, "TS pin restore\n"); break; @@ -164,13 +186,35 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } -static int axp288_adc_set_state(struct regmap *regmap) +static int axp288_adc_initialize(struct axp288_adc_info *info) { - /* ADC should be always enabled for internal FG to function */ - if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON)) - return -EIO; + int ret, adc_enable_val; - return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); + /* + * Determine if the TS pin is enabled and set the TS current-source + * accordingly. + */ + ret = regmap_read(info->regmap, AXP20X_ADC_EN1, &adc_enable_val); + if (ret) + return ret; + + if (adc_enable_val & AXP288_ADC_TS_ENABLE) { + info->ts_enabled = true; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_ON); + } else { + info->ts_enabled = false; + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_CURRENT_ON_OFF_MASK, + AXP288_ADC_TS_CURRENT_OFF); + } + if (ret) + return ret; + + /* Turn on the ADC for all channels except TS, leave TS as is */ + return regmap_update_bits(info->regmap, AXP20X_ADC_EN1, + AXP288_ADC_EN_MASK, AXP288_ADC_EN_MASK); } static const struct iio_info axp288_adc_iio_info = { @@ -200,7 +244,7 @@ static int axp288_adc_probe(struct platform_device *pdev) * Set ADC to enabled state at all time, including system suspend. * otherwise internal fuel gauge functionality may be affected. */ - ret = axp288_adc_set_state(axp20x->regmap); + ret = axp288_adc_initialize(info); if (ret) { dev_err(&pdev->dev, "unable to enable ADC device\n"); return ret; From ee17e5d6201c66492a0e8053190fca2ed2b8457d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 12 Jan 2019 11:48:20 -0600 Subject: [PATCH 0058/1436] signal: Make siginmask safe when passed a signal of 0 Eric Biggers reported: > The following commit, which went into v4.20, introduced undefined behavior when > sys_rt_sigqueueinfo() is called with sig=0: > > commit 4ce5f9c9e7546915c559ffae594e6d73f918db00 > Author: Eric W. Biederman > Date: Tue Sep 25 12:59:31 2018 +0200 > > signal: Use a smaller struct siginfo in the kernel > > In sig_specific_sicodes(), used from known_siginfo_layout(), the expression > '1ULL << ((sig)-1)' is undefined as it evaluates to 1ULL << 4294967295. > > Reproducer: > > #include > #include > #include > > int main(void) > { > siginfo_t si = { .si_code = 1 }; > syscall(__NR_rt_sigqueueinfo, 0, 0, &si); > } > > UBSAN report for v5.0-rc1: > > UBSAN: Undefined behaviour in kernel/signal.c:2946:7 > shift exponent 4294967295 is too large for 64-bit type 'long unsigned int' > CPU: 2 PID: 346 Comm: syz_signal Not tainted 5.0.0-rc1 #25 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 > Call Trace: > __dump_stack lib/dump_stack.c:77 [inline] > dump_stack+0x70/0xa5 lib/dump_stack.c:113 > ubsan_epilogue+0xd/0x40 lib/ubsan.c:159 > __ubsan_handle_shift_out_of_bounds+0x12c/0x170 lib/ubsan.c:425 > known_siginfo_layout+0xae/0xe0 kernel/signal.c:2946 > post_copy_siginfo_from_user kernel/signal.c:3009 [inline] > __copy_siginfo_from_user+0x35/0x60 kernel/signal.c:3035 > __do_sys_rt_sigqueueinfo kernel/signal.c:3553 [inline] > __se_sys_rt_sigqueueinfo kernel/signal.c:3549 [inline] > __x64_sys_rt_sigqueueinfo+0x31/0x70 kernel/signal.c:3549 > do_syscall_64+0x4c/0x1b0 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x433639 > Code: c4 18 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b 27 00 00 c3 66 2e 0f 1f 84 00 00 00 00 > RSP: 002b:00007fffcb289fc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000081 > RAX: ffffffffffffffda RBX: 00000000004002e0 RCX: 0000000000433639 > RDX: 00007fffcb289fd0 RSI: 0000000000000000 RDI: 0000000000000000 > RBP: 00000000006b2018 R08: 000000000000004d R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401560 > R13: 00000000004015f0 R14: 0000000000000000 R15: 0000000000000000 I have looked at the other callers of siginmask and they all appear to in locations where sig can not be zero. I have looked at the code generation of adding an extra test against zero and gcc was able with a simple decrement instruction to combine the two tests together. So the at most adding this test cost a single cpu cycle. In practice that decrement instruction was already present as part of the mask comparison, so the only change was when the instruction was executed. So given that it is cheap, and obviously correct to update siginmask to verify the signal is not zero. Fix this issue there to avoid any future problems. Reported-by: Eric Biggers Fixes: 4ce5f9c9e754 ("signal: Use a smaller struct siginfo in the kernel") Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/signal.h b/include/linux/signal.h index cc7e2c1cd444..9702016734b1 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -392,7 +392,7 @@ extern bool unhandled_signal(struct task_struct *tsk, int sig); #endif #define siginmask(sig, mask) \ - ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) + ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) From b119d3bc328e7a9574861ebe0c2110e2776c2de1 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Fri, 11 Jan 2019 23:13:09 +0000 Subject: [PATCH 0059/1436] tools: iio: iio_generic_buffer: make num_loops signed Currently, num_loops is unsigned, but it's set by strtoll, which returns a (signed) long long int. This could lead to overflow, and it also makes the check "num_loops < 0" always be false, since num_loops is unsigned. Setting num_loops to -1 to loop forever is almost working because num_loops is getting set to a very high number, but it's technically still incorrect. Fix this issue by making num_loops signed. This also fixes an error found by Smatch. Signed-off-by: Martin Kelly Reported-by: Dan Carpenter Fixes: 55dda0abcf9d ("tools: iio: iio_generic_buffer: allow continuous looping") Cc: Signed-off-by: Jonathan Cameron --- tools/iio/iio_generic_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 3040830d7797..84545666a09c 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -330,7 +330,7 @@ static const struct option longopts[] = { int main(int argc, char **argv) { - unsigned long long num_loops = 2; + long long num_loops = 2; unsigned long timedelay = 1000000; unsigned long buf_len = 128; From f214ff521fb1f861c8d7f7d0af98b06bf61b3369 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Fri, 11 Jan 2019 13:57:07 -0600 Subject: [PATCH 0060/1436] iio: ti-ads8688: Update buffer allocation for timestamps Per Jonathan Cameron, the buffer needs to allocate room for a 64 bit timestamp as well as the channels. Change the buffer to allocate this additional space. Fixes: 2a86487786b5c ("iio: adc: ti-ads8688: add trigger and buffer support") Signed-off-by: Dan Murphy Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads8688.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 184d686ebd99..8b4568edd5cb 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -41,6 +41,7 @@ #define ADS8688_VREF_MV 4096 #define ADS8688_REALBITS 16 +#define ADS8688_MAX_CHANNELS 8 /* * enum ads8688_range - ADS8688 reference voltage range @@ -385,7 +386,7 @@ static irqreturn_t ads8688_trigger_handler(int irq, void *p) { struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; - u16 buffer[8]; + u16 buffer[ADS8688_MAX_CHANNELS + sizeof(s64)/sizeof(u16)]; int i, j = 0; for (i = 0; i < indio_dev->masklength; i++) { From ba0f4560526ba19300c07ed5a3c1df7592815dc6 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sat, 29 Dec 2018 10:01:18 +0000 Subject: [PATCH 0061/1436] ARM: dts: imx6sx: correct backward compatible of gpt i.MX6SX has same GPT type as i.MX6DL, in GPT driver, it uses below TIMER_OF_DECLARE, so the backward compatible should be "fsl,imx6dl-gpt", correct it. TIMER_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt); Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 272ff6133ec1..d1375d3650fd 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -467,7 +467,7 @@ }; gpt: gpt@2098000 { - compatible = "fsl,imx6sx-gpt", "fsl,imx31-gpt"; + compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt"; reg = <0x02098000 0x4000>; interrupts = ; clocks = <&clks IMX6SX_CLK_GPT_BUS>, From dc14455841773df52b15a17b36d01a26745f212b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 13 Jan 2019 17:57:22 +0800 Subject: [PATCH 0062/1436] pinctrl: sunxi: Fix and simplify pin bank regulator handling The new per-pin-bank regulator handling code in the sunxi pinctrl driver has mismatched conditions for enabling and disabling the regulator: it is enabled each time a pin is requested, but only disabled when the pin-bank's reference count reaches zero. Since we are doing reference counting already, there's no need to enable the regulator each time a pin is requested. Instead we can just do it for the first requested pin of each pin-bank. Thus we can reverse the test and bail out early if it's not the first occurrence. Fixes: 9a2a566adb00 ("pinctrl: sunxi: Deal with per-bank regulators") Acked-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 36 ++++++++++++--------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 5d9184d18c16..9ad6e9c2adab 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -699,25 +699,21 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset) struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); unsigned short bank = offset / PINS_PER_BANK; struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank]; - struct regulator *reg; + struct regulator *reg = s_reg->regulator; + char supply[16]; int ret; - reg = s_reg->regulator; - if (!reg) { - char supply[16]; - - snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank); - reg = regulator_get(pctl->dev, supply); - if (IS_ERR(reg)) { - dev_err(pctl->dev, "Couldn't get bank P%c regulator\n", - 'A' + bank); - return PTR_ERR(reg); - } - - s_reg->regulator = reg; - refcount_set(&s_reg->refcount, 1); - } else { + if (reg) { refcount_inc(&s_reg->refcount); + return 0; + } + + snprintf(supply, sizeof(supply), "vcc-p%c", 'a' + bank); + reg = regulator_get(pctl->dev, supply); + if (IS_ERR(reg)) { + dev_err(pctl->dev, "Couldn't get bank P%c regulator\n", + 'A' + bank); + return PTR_ERR(reg); } ret = regulator_enable(reg); @@ -727,13 +723,13 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset) goto out; } + s_reg->regulator = reg; + refcount_set(&s_reg->refcount, 1); + return 0; out: - if (refcount_dec_and_test(&s_reg->refcount)) { - regulator_put(s_reg->regulator); - s_reg->regulator = NULL; - } + regulator_put(s_reg->regulator); return ret; } From ca4438442ef263cbaa3ae62a712143132cf508c1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 13 Jan 2019 17:57:23 +0800 Subject: [PATCH 0063/1436] pinctrl: sunxi: Consider pin_base when calculating regulator array index On most newer Allwinner SoCs, there are two pinctrl devices, the PIO and R_PIO. PIO covers pin-banks PA to PI (PJ and PK have not been seen), while R_PIO covers PL to PN. The regulator array only has space for 12 entries, which was designed to cover PA to PL. On the A80, the pin banks go up to PN, which would be the 14th entry in the regulator array. However since the driver only needs to track regulators for its own pin banks, the array only needs to have 9 entries, and also take in to account the value of pin_base, such that the regulator for the first pin-bank of the pinctrl device, be it "PA" or "PL" uses the first entry of the array. Base the regulator array index on pin_base, such that "PA" for PIO and "PL" for R_PIO both take the first element within their respective device's regulator array. Also decrease the size of the regulator array to 9, just enough to cover "PA" to "PI". Fixes: 9a2a566adb00 ("pinctrl: sunxi: Deal with per-bank regulators") Signed-off-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 8 ++++++-- drivers/pinctrl/sunxi/pinctrl-sunxi.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 9ad6e9c2adab..0e7fa69e93df 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -698,7 +698,9 @@ static int sunxi_pmx_request(struct pinctrl_dev *pctldev, unsigned offset) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); unsigned short bank = offset / PINS_PER_BANK; - struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank]; + unsigned short bank_offset = bank - pctl->desc->pin_base / + PINS_PER_BANK; + struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset]; struct regulator *reg = s_reg->regulator; char supply[16]; int ret; @@ -738,7 +740,9 @@ static int sunxi_pmx_free(struct pinctrl_dev *pctldev, unsigned offset) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); unsigned short bank = offset / PINS_PER_BANK; - struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank]; + unsigned short bank_offset = bank - pctl->desc->pin_base / + PINS_PER_BANK; + struct sunxi_pinctrl_regulator *s_reg = &pctl->regulators[bank_offset]; if (!refcount_dec_and_test(&s_reg->refcount)) return 0; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index e340d2a24b44..034c0317c8d6 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -136,7 +136,7 @@ struct sunxi_pinctrl { struct gpio_chip *chip; const struct sunxi_pinctrl_desc *desc; struct device *dev; - struct sunxi_pinctrl_regulator regulators[12]; + struct sunxi_pinctrl_regulator regulators[9]; struct irq_domain *domain; struct sunxi_pinctrl_function *functions; unsigned nfunctions; From f29200c8b1e267b109242677ae7f2146c7682d14 Mon Sep 17 00:00:00 2001 From: Loys Ollivier Date: Mon, 14 Jan 2019 15:44:21 +0100 Subject: [PATCH 0064/1436] arm64: dts: meson: Fix mmc cd-gpios polarity Commit 89a5e15bcba8 ("gpio/mmc/of: Respect polarity in the device tree") changed the behavior of "cd-inverted" to follow the device tree bindings specification: According to SDHCI standard, CD lines are specified as "active low". Using the "cd-inverted" property means that the CD line is "active high". Fix the SD card description for meson by setting the cd-gpios as "active low", according to the boards specifications, and dropping the "cd-inverted" property. Fixes: 89a5e15bcba8 ("gpio/mmc/of: Respect polarity in the device tree") Signed-off-by: Loys Ollivier Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts | 3 +-- arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts | 3 +-- 14 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index e14e0ce7e89f..016641a41694 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -187,8 +187,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 8cd50b75171d..ade2ee09ae96 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -305,8 +305,7 @@ max-frequency = <200000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddio_ao3v3>; vqmmc-supply = <&vddio_tf>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts index 4cf7f6e80c6a..25105ac96d55 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts @@ -238,8 +238,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 2e1cd5e3a246..1cc9dc68ef00 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -258,8 +258,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index ce862266b9aa..0be0f2a5d2fe 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -196,8 +196,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 93a4acf2c46c..ad4d50bd9d77 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -154,8 +154,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index ec09bb5792b7..2d2db783c44c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -211,8 +211,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vcc_3v3>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts index f1c410e2da2b..796baea7a0bf 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts @@ -131,8 +131,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index db293440e4ca..255cede7b447 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -238,8 +238,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; vqmmc-supply = <&vcc_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 6739697be1de..9cbdb85fb591 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -183,8 +183,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_card>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index a1b31013ab6e..bc811a2faf42 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -137,8 +137,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index 3c3a667a8df8..3f086ed7de05 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -356,8 +356,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index f7a1cffab4a8..8acfd40090d2 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -147,8 +147,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts index 7212dc4531e4..7fa20a8ede17 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts @@ -170,8 +170,7 @@ max-frequency = <100000000>; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vddao_3v3>; vqmmc-supply = <&vddio_boot>; From 73f5a82bb3c9fce550da4a74a32b8cb064b50663 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Jan 2019 15:57:04 +0200 Subject: [PATCH 0065/1436] RDMA/mad: Reduce MAD scope to mlx5_ib only Management Datagram Interface (MAD) is applicable only when physical port is Infiniband. It makes MAD command logic to be completely unrelated to eth/core parts of mlx5. Signed-off-by: Leon Romanovsky Acked-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 37 +++++++++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/mad.c | 11 ++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 75 ------------------- include/linux/mlx5/driver.h | 2 - 7 files changed, 47 insertions(+), 85 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..efe383c0ac86 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1038,9 +1038,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..0257731e6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..4e444863054a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -897,8 +897,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); From 97e1532ef81acb31c30f9e75bf00306c33a77812 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: [PATCH 0066/1436] fuse: handle zero sized retrieve correctly Dereferencing req->page_descs[0] will Oops if req->max_pages is zero. Reported-by: syzbot+c1e36d30ee3416289cc0@syzkaller.appspotmail.com Tested-by: syzbot+c1e36d30ee3416289cc0@syzkaller.appspotmail.com Fixes: b2430d7567a3 ("fuse: add per-page descriptor to fuse_req") Cc: # v3.9 Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a5e516a40e7a..fc29264011a6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1742,7 +1742,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.h.nodeid = outarg->nodeid; req->in.numargs = 2; req->in.argpages = 1; - req->page_descs[0].offset = offset; req->end = fuse_retrieve_end; index = outarg->offset >> PAGE_SHIFT; @@ -1757,6 +1756,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, this_num = min_t(unsigned, num, PAGE_SIZE - offset); req->pages[req->num_pages] = page; + req->page_descs[req->num_pages].offset = offset; req->page_descs[req->num_pages].length = this_num; req->num_pages++; From 8a3177db59cd644fde05ba9efee29392dfdec8aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: [PATCH 0067/1436] cuse: fix ioctl cuse_process_init_reply() doesn't initialize fc->max_pages and thus all cuse bases ioctls fail with ENOMEM. Reported-by: Andreas Steinmetz Fixes: 5da784cce430 ("fuse: add max_pages to init_out") Cc: # v4.20 Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 76baaa6be393..c2d4099429be 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -628,6 +628,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); + fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -1162,7 +1163,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->user_id = d.user_id; fc->group_id = d.group_id; fc->max_read = max_t(unsigned, 4096, d.max_read); - fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; /* Used by get_root_inode() */ sb->s_fs_info = fc; From 9509941e9c534920ccc4771ae70bd6cbbe79df1c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 12 Jan 2019 02:39:05 +0100 Subject: [PATCH 0068/1436] fuse: call pipe_buf_release() under pipe lock Some of the pipe_buf_release() handlers seem to assume that the pipe is locked - in particular, anon_pipe_buf_release() accesses pipe->tmp_page without taking any extra locks. From a glance through the callers of pipe_buf_release(), it looks like FUSE is the only one that calls pipe_buf_release() without having the pipe locked. This bug should only lead to a memory leak, nothing terrible. Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fc29264011a6..809c0f2f9942 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2077,8 +2077,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ret = fuse_dev_do_write(fud, &cs, len); + pipe_lock(pipe); for (idx = 0; idx < nbuf; idx++) pipe_buf_release(pipe, &bufs[idx]); + pipe_unlock(pipe); out: kvfree(bufs); From a2ebba824106dabe79937a9f29a875f837e1b6d4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: [PATCH 0069/1436] fuse: decrement NR_WRITEBACK_TEMP on the right page NR_WRITEBACK_TEMP is accounted on the temporary page in the request, not the page cache page. Fixes: 8b284dc47291 ("fuse: writepages: handle same page rewrites") Cc: # v3.13 Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ffaffe18352a..a59c16bd90ac 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1782,7 +1782,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, spin_unlock(&fc->lock); dec_wb_stat(&bdi->wb, WB_WRITEBACK); - dec_node_page_state(page, NR_WRITEBACK_TEMP); + dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); From c5b11ee9f1c22a8d6b5cab2099904f7adbe79a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guido=20G=C3=BCnther?= Date: Wed, 16 Jan 2019 09:41:22 +0100 Subject: [PATCH 0070/1436] dt-bindings: imx8mq: Number clocks consecutively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a duplicate use of 232 and numbers the clocks without holes. Fixes: 1cf3817bf1f5 ("dt-bindings: Add binding for i.MX8MQ CCM") Signed-off-by: Guido Günther Reviewed-by: Lucas Stach Signed-off-by: Shawn Guo --- include/dt-bindings/clock/imx8mq-clock.h | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h index b53be41929be..04f7ac345984 100644 --- a/include/dt-bindings/clock/imx8mq-clock.h +++ b/include/dt-bindings/clock/imx8mq-clock.h @@ -350,7 +350,7 @@ #define IMX8MQ_CLK_VPU_G2_ROOT 241 /* SCCG PLL GATE */ -#define IMX8MQ_SYS1_PLL_OUT 232 +#define IMX8MQ_SYS1_PLL_OUT 242 #define IMX8MQ_SYS2_PLL_OUT 243 #define IMX8MQ_SYS3_PLL_OUT 244 #define IMX8MQ_DRAM_PLL_OUT 245 @@ -372,24 +372,24 @@ /* txesc clock */ #define IMX8MQ_CLK_DSI_IPG_DIV 256 -#define IMX8MQ_CLK_TMU_ROOT 265 +#define IMX8MQ_CLK_TMU_ROOT 257 /* Display root clocks */ -#define IMX8MQ_CLK_DISP_AXI_ROOT 266 -#define IMX8MQ_CLK_DISP_APB_ROOT 267 -#define IMX8MQ_CLK_DISP_RTRM_ROOT 268 +#define IMX8MQ_CLK_DISP_AXI_ROOT 258 +#define IMX8MQ_CLK_DISP_APB_ROOT 259 +#define IMX8MQ_CLK_DISP_RTRM_ROOT 260 -#define IMX8MQ_CLK_OCOTP_ROOT 269 +#define IMX8MQ_CLK_OCOTP_ROOT 261 -#define IMX8MQ_CLK_DRAM_ALT_ROOT 270 -#define IMX8MQ_CLK_DRAM_CORE 271 +#define IMX8MQ_CLK_DRAM_ALT_ROOT 262 +#define IMX8MQ_CLK_DRAM_CORE 263 -#define IMX8MQ_CLK_MU_ROOT 272 -#define IMX8MQ_VIDEO2_PLL_OUT 273 +#define IMX8MQ_CLK_MU_ROOT 264 +#define IMX8MQ_VIDEO2_PLL_OUT 265 -#define IMX8MQ_CLK_CLKO2 274 +#define IMX8MQ_CLK_CLKO2 266 -#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK 275 +#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK 267 -#define IMX8MQ_CLK_END 276 +#define IMX8MQ_CLK_END 268 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */ From ec5aecc0b227f5509d25853537f989ca303e2be1 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 14 Jan 2019 13:00:23 +0200 Subject: [PATCH 0071/1436] iwlwifi: make IWLWIFI depend on CFG80211 Since IWLWIFI doesn't depend on MAC80211 anymore, it needs to depend on CFG80211, because it uses a few symbols from it. Add the dependency on CFG80211 accordingly. Additionally, make IWLWIFI_LEDS depend on IWLMVM or IWLDVM, since it doesn't need mac80211 but must be used for these. Fixes: aca432f06b8a ("iwlwifi: make MVM and DVM depend on MAC80211") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 491ca3c8b43c..83d5bceea08f 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -1,6 +1,6 @@ config IWLWIFI tristate "Intel Wireless WiFi Next Gen AGN - Wireless-N/Advanced-N/Ultimate-N (iwlwifi) " - depends on PCI && HAS_IOMEM + depends on PCI && HAS_IOMEM && CFG80211 select FW_LOADER ---help--- Select to build the driver supporting the: @@ -47,6 +47,7 @@ if IWLWIFI config IWLWIFI_LEDS bool depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI + depends on IWLMVM || IWLDVM select LEDS_TRIGGERS select MAC80211_LEDS default y From e3966a766865da7ced1dece663697861dd5cf103 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 11 Jan 2019 18:21:18 +0100 Subject: [PATCH 0072/1436] ARM: dts: da850: fix interrupt numbers for clocksource The timer interrupts specified in commit 3652e2741f42 ("ARM: dts: da850: Add clocks") are wrong but since the current timer code hard-codes them, the bug was never spotted. This patch must go into stable since, once we introduce a proper clocksource driver, devices with buggy device tree will stop booting. Fixes: 3652e2741f42 ("ARM: dts: da850: Add clocks") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index 47aa53ba6b92..559659b399d0 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -476,7 +476,7 @@ clocksource: timer@20000 { compatible = "ti,da830-timer"; reg = <0x20000 0x1000>; - interrupts = <12>, <13>; + interrupts = <21>, <22>; interrupt-names = "tint12", "tint34"; clocks = <&pll0_auxclk>; }; From 2bb7b675248c3ad11ada0ce3d0f6d480ec8cc87b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:13 +0100 Subject: [PATCH 0073/1436] arm64: dts: renesas: r8a774a1: Enable DMA for SCIF2 SCIF2 on RZ/G2M can be used with both DMAC1 and DMAC2. Fixes: 3a3933a4fa36430a ("arm64: dts: renesas: r8a774a1: Add SCIF and HSCIF nodes") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a774a1.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index 20745a8528c5..719ed9d9067d 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -1011,6 +1011,9 @@ <&cpg CPG_CORE R8A774A1_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; From 97f26702bc95b5c3a72671d5c6675e4d6ee0a2f4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:15 +0100 Subject: [PATCH 0074/1436] arm64: dts: renesas: r8a7796: Enable DMA for SCIF2 SCIF2 on R-Car M3-W can be used with both DMAC1 and DMAC2. Fixes: dbcae5ea4bd27409 ("arm64: dts: r8a7796: Enable SCIF DMA") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a7796.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi index afedbf5728ec..0648d12778ed 100644 --- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi @@ -1262,6 +1262,9 @@ <&cpg CPG_CORE R8A7796_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7796_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; From 05c8478abd485507c25aa565afab604af8d8fe46 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:16 +0100 Subject: [PATCH 0075/1436] arm64: dts: renesas: r8a77965: Enable DMA for SCIF2 SCIF2 on R-Car M3-N can be used with both DMAC1 and DMAC2. Fixes: 0ea5b2fd38db56aa ("arm64: dts: renesas: r8a77965: Add SCIF device nodes") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a77965.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 6dc9b1fef830..4b3730f640ef 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -1068,6 +1068,9 @@ <&cpg CPG_CORE R8A77965_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A77965_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; From 013b2dff8153b4c01d2179dbefec6b9108383802 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 11 Jan 2019 13:37:43 +0100 Subject: [PATCH 0076/1436] mt76: fix tx status reporting for non-probing frames On MT76x2, the hardware does not report tx status in the FIFO register, if the packet id is 0. Change the allocation of packet IDs to use 0 for no-ack packets, 1 for non-probing packets and 2-255 for packets with tx status requested. Fixes rate control issues Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 4 +++- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c | 3 +-- drivers/net/wireless/mediatek/mt76/tx.c | 9 +++++---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 8ef430d8afc4..8d56ab97abe3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -239,7 +239,9 @@ struct mt76_rx_tid { #define MT_TX_CB_TXS_FAILED BIT(2) #define MT_PACKET_ID_MASK GENMASK(7, 0) -#define MT_PACKET_ID_NO_ACK MT_PACKET_ID_MASK +#define MT_PACKET_ID_NO_ACK 0 +#define MT_PACKET_ID_NO_SKB 1 +#define MT_PACKET_ID_FIRST 2 #define MT_TX_STATUS_SKB_TIMEOUT HZ diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 6adea0278284..38cd77d30f5c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -433,7 +433,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, } if (wcid) { - if (stat->pktid) + if (stat->pktid >= MT_PACKET_ID_FIRST) status.skb = mt76_tx_status_skb_get(mdev, wcid, stat->pktid, &list); if (status.skb) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c b/drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c index 4598cb2cc3ff..a5413a309a0a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_txrx.c @@ -177,7 +177,7 @@ int mt76x02_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, if (ret < 0) return ret; - if (pid && pid != MT_PACKET_ID_NO_ACK) + if (pid >= MT_PACKET_ID_FIRST) qsel = MT_QSEL_MGMT; *tx_info = FIELD_PREP(MT_TXD_INFO_QSEL, qsel) | diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index 81970cf777c0..098d05e109e7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -87,8 +87,7 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, pid = mt76_tx_status_skb_add(mdev, wcid, skb); txwi->pktid = pid; - if ((pid && pid != MT_PACKET_ID_NO_ACK) || - q2ep(q->hw_idx) == MT_EP_OUT_HCCA) + if (pid >= MT_PACKET_ID_FIRST || q2ep(q->hw_idx) == MT_EP_OUT_HCCA) qsel = MT_QSEL_MGMT; else qsel = MT_QSEL_EDCA; diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index f08509c1670d..1ea08ce62713 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -170,21 +170,22 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid, int pid; if (!wcid) - return 0; + return MT_PACKET_ID_NO_ACK; if (info->flags & IEEE80211_TX_CTL_NO_ACK) return MT_PACKET_ID_NO_ACK; if (!(info->flags & (IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_CTL_RATE_CTRL_PROBE))) - return 0; + return MT_PACKET_ID_NO_SKB; spin_lock_bh(&dev->status_list.lock); memset(cb, 0, sizeof(*cb)); wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK; - if (!wcid->packet_id || wcid->packet_id == MT_PACKET_ID_NO_ACK) - wcid->packet_id = 1; + if (wcid->packet_id == MT_PACKET_ID_NO_ACK || + wcid->packet_id == MT_PACKET_ID_NO_SKB) + wcid->packet_id = MT_PACKET_ID_FIRST; pid = wcid->packet_id; cb->wcid = wcid->idx; From f545540d5f6007def13cf235a914891b67693828 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 16 Jan 2019 22:33:18 +0100 Subject: [PATCH 0077/1436] mt76: set IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR flag The hardware does not deal with multiple WCID entries for the same station properly. Set IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR to avoid those cases Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 7b926dfa6b97..3d976ac4cb41 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -328,6 +328,7 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, ieee80211_hw_set(hw, MFP_CAPABLE); ieee80211_hw_set(hw, AP_LINK_PS); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, NEEDS_UNIQUE_STA_ADDR); wiphy->flags |= WIPHY_FLAG_IBSS_RSN; From 9313faacbb4eeb03799a86cd013e97409a40b970 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 6 Dec 2018 14:27:34 +0100 Subject: [PATCH 0078/1436] mt76: move mt76x02_get_txpower to mt76 core It will be reused by mt7603 later Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 18 ++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 3 +++ .../net/wireless/mediatek/mt76/mt76x0/pci.c | 2 +- .../net/wireless/mediatek/mt76/mt76x0/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x02.h | 2 -- .../net/wireless/mediatek/mt76/mt76x02_util.c | 18 ------------------ .../wireless/mediatek/mt76/mt76x2/pci_main.c | 2 +- .../wireless/mediatek/mt76/mt76x2/usb_main.c | 2 +- 8 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 3d976ac4cb41..467b5a692ed1 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -710,3 +710,21 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, return 0; } EXPORT_SYMBOL_GPL(mt76_sta_state); + +int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + int *dbm) +{ + struct mt76_dev *dev = hw->priv; + int n_chains = __sw_hweight8(dev->antenna_mask); + + *dbm = dev->txpower_cur / 2; + + /* convert from per-chain power to combined + * output on 2x2 devices + */ + if (n_chains > 1) + *dbm += 3; + + return 0; +} +EXPORT_SYMBOL_GPL(mt76_get_txpower); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 8d56ab97abe3..16b342f63d29 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -680,6 +680,9 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *mt76_rx_convert(struct sk_buff *skb); +int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + int *dbm); + /* internal */ void mt76_tx_free(struct mt76_dev *dev); struct mt76_txwi_cache *mt76_get_txwi(struct mt76_dev *dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 720b05e325a5..1472c8699b29 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -99,7 +99,7 @@ static const struct ieee80211_ops mt76x0e_ops = { .sta_rate_tbl_update = mt76x02_sta_rate_tbl_update, .wake_tx_queue = mt76_wake_tx_queue, .get_survey = mt76_get_survey, - .get_txpower = mt76x02_get_txpower, + .get_txpower = mt76_get_txpower, .flush = mt76x0e_flush, .set_tim = mt76x0e_set_tim, .release_buffered_frames = mt76_release_buffered_frames, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 0e6b43bb4678..f66e1b2f0980 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -155,7 +155,7 @@ static const struct ieee80211_ops mt76x0u_ops = { .sta_rate_tbl_update = mt76x02_sta_rate_tbl_update, .set_rts_threshold = mt76x02_set_rts_threshold, .wake_tx_queue = mt76_wake_tx_queue, - .get_txpower = mt76x02_get_txpower, + .get_txpower = mt76_get_txpower, }; static int mt76x0u_register_device(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index d8bd0509a790..6d96766a6ed3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -172,8 +172,6 @@ void mt76x02_sw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const u8 *mac); void mt76x02_sw_scan_complete(struct ieee80211_hw *hw, struct ieee80211_vif *vif); -int mt76x02_get_txpower(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, int *dbm); void mt76x02_sta_ps(struct mt76_dev *dev, struct ieee80211_sta *sta, bool ps); void mt76x02_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index d80c8eb5a0ec..59f50a957cd5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -607,24 +607,6 @@ void mt76x02_sw_scan_complete(struct ieee80211_hw *hw, } EXPORT_SYMBOL_GPL(mt76x02_sw_scan_complete); -int mt76x02_get_txpower(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, int *dbm) -{ - struct mt76x02_dev *dev = hw->priv; - u8 nstreams = dev->mt76.chainmask & 0xf; - - *dbm = dev->mt76.txpower_cur / 2; - - /* convert from per-chain power to combined - * output on 2x2 devices - */ - if (nstreams > 1) - *dbm += 3; - - return 0; -} -EXPORT_SYMBOL_GPL(mt76x02_get_txpower); - void mt76x02_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c index 6a87b88eb036..06a26a152ba4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c @@ -191,7 +191,7 @@ const struct ieee80211_ops mt76x2_ops = { .sw_scan_complete = mt76x02_sw_scan_complete, .flush = mt76x2_flush, .ampdu_action = mt76x02_ampdu_action, - .get_txpower = mt76x02_get_txpower, + .get_txpower = mt76_get_txpower, .wake_tx_queue = mt76_wake_tx_queue, .sta_rate_tbl_update = mt76x02_sta_rate_tbl_update, .release_buffered_frames = mt76_release_buffered_frames, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c index 2b48cc51a30d..286c7f451090 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c @@ -138,5 +138,5 @@ const struct ieee80211_ops mt76x2u_ops = { .sw_scan_start = mt76x02_sw_scan, .sw_scan_complete = mt76x02_sw_scan_complete, .sta_rate_tbl_update = mt76x02_sta_rate_tbl_update, - .get_txpower = mt76x02_get_txpower, + .get_txpower = mt76_get_txpower, }; From ef13edc0070818f5cddb3a1d31957cc618e25e9d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 5 Dec 2018 17:06:58 +0100 Subject: [PATCH 0079/1436] mt76: move mt76x02_phy_get_min_avg_rssi to mt76 core This will be used by mt7603 as well Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 8 +++- drivers/net/wireless/mediatek/mt76/mt76.h | 8 ++++ .../net/wireless/mediatek/mt76/mt76x0/phy.c | 4 +- .../net/wireless/mediatek/mt76/mt76x02_mac.c | 5 -- .../net/wireless/mediatek/mt76/mt76x02_mac.h | 6 --- .../net/wireless/mediatek/mt76/mt76x02_phy.c | 47 ------------------- .../net/wireless/mediatek/mt76/mt76x02_phy.h | 1 - .../net/wireless/mediatek/mt76/mt76x02_util.c | 2 - .../net/wireless/mediatek/mt76/mt76x2/phy.c | 4 +- drivers/net/wireless/mediatek/mt76/util.c | 42 +++++++++++++++++ 10 files changed, 62 insertions(+), 65 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 467b5a692ed1..226f98fff6ef 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -548,7 +548,7 @@ mt76_check_ccmp_pn(struct sk_buff *skb) } static void -mt76_check_ps(struct mt76_dev *dev, struct sk_buff *skb) +mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb) { struct mt76_rx_status *status = (struct mt76_rx_status *) skb->cb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -567,6 +567,9 @@ mt76_check_ps(struct mt76_dev *dev, struct sk_buff *skb) sta = container_of((void *) wcid, struct ieee80211_sta, drv_priv); + ewma_signal_add(&wcid->rssi, status->signal); + wcid->inactive_count = 0; + if (!test_bit(MT_WCID_FLAG_CHECK_PS, &wcid->flags)) return; @@ -626,7 +629,7 @@ void mt76_rx_poll_complete(struct mt76_dev *dev, enum mt76_rxq_id q, __skb_queue_head_init(&frames); while ((skb = __skb_dequeue(&dev->rx_skb[q])) != NULL) { - mt76_check_ps(dev, skb); + mt76_check_sta(dev, skb); mt76_rx_aggr_reorder(skb, &frames); } @@ -660,6 +663,7 @@ mt76_sta_add(struct mt76_dev *dev, struct ieee80211_vif *vif, mt76_txq_init(dev, sta->txq[i]); } + ewma_signal_init(&wcid->rssi); rcu_assign_pointer(dev->wcid[wcid->idx], wcid); out: diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 16b342f63d29..2267163d2445 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "util.h" @@ -174,6 +175,8 @@ enum mt76_wcid_flags { #define MT76_N_WCIDS 128 +DECLARE_EWMA(signal, 10, 8); + struct mt76_wcid { struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS]; @@ -181,6 +184,9 @@ struct mt76_wcid { unsigned long flags; + struct ewma_signal rssi; + int inactive_count; + u8 idx; u8 hw_key_idx; @@ -680,6 +686,8 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *mt76_rx_convert(struct sk_buff *skb); +int mt76_get_min_avg_rssi(struct mt76_dev *dev); + int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int *dbm); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index b2b38b9fcac7..5a4c6f34267e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -1077,7 +1077,9 @@ mt76x0_phy_update_channel_gain(struct mt76x02_dev *dev) u8 gain_delta; int low_gain; - dev->cal.avg_rssi_all = mt76x02_phy_get_min_avg_rssi(dev); + dev->cal.avg_rssi_all = mt76_get_min_avg_rssi(&dev->mt76); + if (!dev->cal.avg_rssi_all) + dev->cal.avg_rssi_all = -75; low_gain = (dev->cal.avg_rssi_all > mt76x02_get_rssi_gain_thresh(dev)) + (dev->cal.avg_rssi_all > mt76x02_get_low_rssi_gain_thresh(dev)); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 38cd77d30f5c..68d01e136fa0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -656,11 +656,6 @@ int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb, status->tid = FIELD_GET(MT_RXWI_TID, tid_sn); status->seqno = FIELD_GET(MT_RXWI_SN, tid_sn); - if (sta) { - ewma_signal_add(&sta->rssi, status->signal); - sta->inactive_count = 0; - } - return mt76x02_mac_process_rate(status, rate); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index 735fe96440ba..940c07f665cd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -18,8 +18,6 @@ #ifndef __MT76X02_MAC_H #define __MT76X02_MAC_H -#include - struct mt76x02_dev; struct mt76x02_tx_status { @@ -41,8 +39,6 @@ struct mt76x02_vif { u8 idx; }; -DECLARE_EWMA(signal, 10, 8); - struct mt76x02_sta { struct mt76_wcid wcid; /* must be first */ @@ -50,8 +46,6 @@ struct mt76x02_sta { struct mt76x02_tx_status status; int n_frames; - struct ewma_signal rssi; - int inactive_count; }; #define MT_RXINFO_BA BIT(0) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c index 977a8e7e26df..a020c757ba5c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c @@ -132,53 +132,6 @@ void mt76x02_phy_set_txpower(struct mt76x02_dev *dev, int txp_0, int txp_1) } EXPORT_SYMBOL_GPL(mt76x02_phy_set_txpower); -int mt76x02_phy_get_min_avg_rssi(struct mt76x02_dev *dev) -{ - struct mt76x02_sta *sta; - struct mt76_wcid *wcid; - int i, j, min_rssi = 0; - s8 cur_rssi; - - local_bh_disable(); - rcu_read_lock(); - - for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid_mask); i++) { - unsigned long mask = dev->mt76.wcid_mask[i]; - - if (!mask) - continue; - - for (j = i * BITS_PER_LONG; mask; j++, mask >>= 1) { - if (!(mask & 1)) - continue; - - wcid = rcu_dereference(dev->mt76.wcid[j]); - if (!wcid) - continue; - - sta = container_of(wcid, struct mt76x02_sta, wcid); - spin_lock(&dev->mt76.rx_lock); - if (sta->inactive_count++ < 5) - cur_rssi = ewma_signal_read(&sta->rssi); - else - cur_rssi = 0; - spin_unlock(&dev->mt76.rx_lock); - - if (cur_rssi < min_rssi) - min_rssi = cur_rssi; - } - } - - rcu_read_unlock(); - local_bh_enable(); - - if (!min_rssi) - return -75; - - return min_rssi; -} -EXPORT_SYMBOL_GPL(mt76x02_phy_get_min_avg_rssi); - void mt76x02_phy_set_bw(struct mt76x02_dev *dev, int width, u8 ctrl) { int core_val, agc_val; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.h b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.h index 2b316cf7c70c..d2971db06f13 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.h @@ -51,7 +51,6 @@ void mt76x02_limit_rate_power(struct mt76_rate_power *r, int limit); int mt76x02_get_max_rate_power(struct mt76_rate_power *r); void mt76x02_phy_set_rxpath(struct mt76x02_dev *dev); void mt76x02_phy_set_txdac(struct mt76x02_dev *dev); -int mt76x02_phy_get_min_avg_rssi(struct mt76x02_dev *dev); void mt76x02_phy_set_bw(struct mt76x02_dev *dev, int width, u8 ctrl); void mt76x02_phy_set_band(struct mt76x02_dev *dev, int band, bool primary_upper); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 59f50a957cd5..0bd23d64db24 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -250,8 +250,6 @@ int mt76x02_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, if (vif->type == NL80211_IFTYPE_AP) set_bit(MT_WCID_FLAG_CHECK_PS, &msta->wcid.flags); - ewma_signal_init(&msta->rssi); - return 0; } EXPORT_SYMBOL_GPL(mt76x02_sta_add); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index c9634a774705..e2ee5e498da7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -284,7 +284,9 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) int low_gain; u32 val; - dev->cal.avg_rssi_all = mt76x02_phy_get_min_avg_rssi(dev); + dev->cal.avg_rssi_all = mt76_get_min_avg_rssi(&dev->mt76); + if (!dev->cal.avg_rssi_all) + dev->cal.avg_rssi_all = -75; low_gain = (dev->cal.avg_rssi_all > mt76x02_get_rssi_gain_thresh(dev)) + (dev->cal.avg_rssi_all > mt76x02_get_low_rssi_gain_thresh(dev)); diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index 0c35b8db58cd..6242421c6011 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -75,4 +75,46 @@ int mt76_wcid_alloc(unsigned long *mask, int size) } EXPORT_SYMBOL_GPL(mt76_wcid_alloc); +int mt76_get_min_avg_rssi(struct mt76_dev *dev) +{ + struct mt76_wcid *wcid; + int i, j, min_rssi = 0; + s8 cur_rssi; + + local_bh_disable(); + rcu_read_lock(); + + for (i = 0; i < ARRAY_SIZE(dev->wcid_mask); i++) { + unsigned long mask = dev->wcid_mask[i]; + + if (!mask) + continue; + + for (j = i * BITS_PER_LONG; mask; j++, mask >>= 1) { + if (!(mask & 1)) + continue; + + wcid = rcu_dereference(dev->wcid[j]); + if (!wcid) + continue; + + spin_lock(&dev->rx_lock); + if (wcid->inactive_count++ < 5) + cur_rssi = ewma_signal_read(&wcid->rssi); + else + cur_rssi = 0; + spin_unlock(&dev->rx_lock); + + if (cur_rssi < min_rssi) + min_rssi = cur_rssi; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + return min_rssi; +} +EXPORT_SYMBOL_GPL(mt76_get_min_avg_rssi); + MODULE_LICENSE("Dual BSD/GPL"); From 02e5a769c0a48e4e145b765329bc9d22a70261e7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 11 Jan 2019 14:17:30 +0100 Subject: [PATCH 0080/1436] mt76: fix rssi ewma tracking The generic EWMA code cannot deal with negative numbers, so convert signal to a positive number before adding it Fixes mt76x2 AGC tuning Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 4 +++- drivers/net/wireless/mediatek/mt76/util.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 226f98fff6ef..3b09d4560ae2 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -567,7 +567,9 @@ mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb) sta = container_of((void *) wcid, struct ieee80211_sta, drv_priv); - ewma_signal_add(&wcid->rssi, status->signal); + if (status->signal <= 0) + ewma_signal_add(&wcid->rssi, -status->signal); + wcid->inactive_count = 0; if (!test_bit(MT_WCID_FLAG_CHECK_PS, &wcid->flags)) diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index 6242421c6011..69270c1a9091 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -100,7 +100,7 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev) spin_lock(&dev->rx_lock); if (wcid->inactive_count++ < 5) - cur_rssi = ewma_signal_read(&wcid->rssi); + cur_rssi = -ewma_signal_read(&wcid->rssi); else cur_rssi = 0; spin_unlock(&dev->rx_lock); From 9cf67ec7fd8077fdf7909449534e32b8438b5edc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 29 Dec 2018 13:01:31 +0100 Subject: [PATCH 0081/1436] mt76: fix signedness of rx status signal field It is usually negative, so it needs to be signed. Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 2267163d2445..0435d623e9ad 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -497,7 +497,7 @@ struct mt76_rx_status { u8 rate_idx; u8 nss; u8 band; - u8 signal; + s8 signal; u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; }; From e7173858c78a981788bde18930d1e16fd5203151 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 15 Jan 2019 14:26:53 +0100 Subject: [PATCH 0082/1436] mt76: add channel switch announcement support Use the appropriate mac80211 callbacks after beacon transmission Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 39 +++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 5 +++ .../net/wireless/mediatek/mt76/mt76x02_mmio.c | 13 ++++++- .../net/wireless/mediatek/mt76/mt76x02_util.c | 2 + 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 3b09d4560ae2..ee3b65a14870 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -734,3 +734,42 @@ int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, return 0; } EXPORT_SYMBOL_GPL(mt76_get_txpower); + +static void +__mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) +{ + if (vif->csa_active && ieee80211_csa_is_complete(vif)) + ieee80211_csa_finish(vif); +} + +void mt76_csa_finish(struct mt76_dev *dev) +{ + if (!dev->csa_complete) + return; + + ieee80211_iterate_active_interfaces_atomic(dev->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + __mt76_csa_finish, dev); + + dev->csa_complete = 0; +} +EXPORT_SYMBOL_GPL(mt76_csa_finish); + +static void +__mt76_csa_check(void *priv, u8 *mac, struct ieee80211_vif *vif) +{ + struct mt76_dev *dev = priv; + + if (!vif->csa_active) + return; + + dev->csa_complete |= ieee80211_csa_is_complete(vif); +} + +void mt76_csa_check(struct mt76_dev *dev) +{ + ieee80211_iterate_active_interfaces_atomic(dev->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + __mt76_csa_check, dev); +} +EXPORT_SYMBOL_GPL(mt76_csa_check); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 0435d623e9ad..2bb9db4ed80a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -463,6 +463,8 @@ struct mt76_dev { bool led_al; u8 led_pin; + u8 csa_complete; + u32 rxfilter; union { @@ -691,6 +693,9 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev); int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int *dbm); +void mt76_csa_check(struct mt76_dev *dev); +void mt76_csa_finish(struct mt76_dev *dev); + /* internal */ void mt76_tx_free(struct mt76_dev *dev); struct mt76_txwi_cache *mt76_get_txwi(struct mt76_dev *dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 6236c6121c01..374bc9d91f12 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -116,6 +116,11 @@ static void mt76x02_pre_tbtt_tasklet(unsigned long arg) IEEE80211_IFACE_ITER_RESUME_ALL, mt76x02_update_beacon_iter, dev); + mt76_csa_check(&dev->mt76); + + if (dev->mt76.csa_complete) + return; + do { nframes = skb_queue_len(&data.q); ieee80211_iterate_active_interfaces_atomic(mt76_hw(dev), @@ -309,8 +314,12 @@ irqreturn_t mt76x02_irq_handler(int irq, void *dev_instance) tasklet_schedule(&dev->pre_tbtt_tasklet); /* send buffered multicast frames now */ - if (intr & MT_INT_TBTT) - mt76_queue_kick(dev, &dev->mt76.q_tx[MT_TXQ_PSD]); + if (intr & MT_INT_TBTT) { + if (dev->mt76.csa_complete) + mt76_csa_finish(&dev->mt76); + else + mt76_queue_kick(dev, &dev->mt76.q_tx[MT_TXQ_PSD]); + } if (intr & MT_INT_TX_STAT) { mt76x02_mac_poll_tx_status(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 0bd23d64db24..062614ad0d51 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -160,6 +160,8 @@ void mt76x02_init_device(struct mt76x02_dev *dev) #endif BIT(NL80211_IFTYPE_ADHOC); + wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_VHT_IBSS); /* init led callbacks */ From 2c0408dd0d8906b26fe8023889af7adf5e68b2c2 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Thu, 20 Dec 2018 11:06:38 +0300 Subject: [PATCH 0083/1436] gpu: ipu-v3: Fix i.MX51 CSI control registers offset The CSI0/CSI1 registers offset is at +0xe030000/+0xe038000 relative to the control module registers on IPUv3EX. This patch fixes wrong values for i.MX51 CSI0/CSI1. Fixes: 2ffd48f2e7 ("gpu: ipu-v3: Add Camera Sensor Interface unit") Signed-off-by: Alexander Shiyan Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 474b00e19697..5b7cdbfe062f 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -898,8 +898,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, - .csi0_ofs = 0x1f030000, - .csi1_ofs = 0x1f038000, + .csi0_ofs = 0x1e030000, + .csi1_ofs = 0x1e038000, .ic_ofs = 0x1e020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, From aa3312012f103f91f123600bbf768b11c8f431bc Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Jan 2019 09:47:42 +0100 Subject: [PATCH 0084/1436] drm/imx: imx-ldb: add missing of_node_puts The device node iterators perform an of_node_get on each iteration, so a jump out of the loop requires an of_node_put. Move the initialization channel->child = child; down to just before the call to imx_ldb_register so that intervening failures don't need to clear it. Add a label at the end of the function to do all the of_node_puts. The semantic patch that finds part of this problem is as follows (http://coccinelle.lip6.fr): // @@ expression root,e; local idexpression child; iterator name for_each_child_of_node; @@ for_each_child_of_node(root, child) { ... when != of_node_put(child) when != e = child ( return child; | * return ...; ) ... } // Signed-off-by: Julia Lawall Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-ldb.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 2c5bbe317353..e31e263cf86b 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -643,8 +643,10 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) int bus_format; ret = of_property_read_u32(child, "reg", &i); - if (ret || i < 0 || i > 1) - return -EINVAL; + if (ret || i < 0 || i > 1) { + ret = -EINVAL; + goto free_child; + } if (!of_device_is_available(child)) continue; @@ -657,7 +659,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) channel = &imx_ldb->channel[i]; channel->ldb = imx_ldb; channel->chno = i; - channel->child = child; /* * The output port is port@4 with an external 4-port mux or @@ -667,13 +668,13 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) imx_ldb->lvds_mux ? 4 : 2, 0, &channel->panel, &channel->bridge); if (ret && ret != -ENODEV) - return ret; + goto free_child; /* panel ddc only if there is no bridge */ if (!channel->bridge) { ret = imx_ldb_panel_ddc(dev, channel, child); if (ret) - return ret; + goto free_child; } bus_format = of_get_bus_format(dev, child); @@ -689,18 +690,26 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) if (bus_format < 0) { dev_err(dev, "could not determine data mapping: %d\n", bus_format); - return bus_format; + ret = bus_format; + goto free_child; } channel->bus_format = bus_format; + channel->child = child; ret = imx_ldb_register(drm, channel); - if (ret) - return ret; + if (ret) { + channel->child = NULL; + goto free_child; + } } dev_set_drvdata(dev, imx_ldb); return 0; + +free_child: + of_node_put(child); + return ret; } static void imx_ldb_unbind(struct device *dev, struct device *master, From bb867d219fda7fbaabea3314702474c4eac2b91d Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 16 Oct 2018 17:31:40 -0700 Subject: [PATCH 0085/1436] gpu: ipu-v3: Fix CSI offsets for imx53 The CSI offsets are wrong for both CSI0 and CSI1. They are at physical address 0x1e030000 and 0x1e038000 respectively. Fixes: 2ffd48f2e7 ("gpu: ipu-v3: Add Camera Sensor Interface unit") Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 5b7cdbfe062f..0a7d4395d427 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -914,8 +914,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, - .csi0_ofs = 0x07030000, - .csi1_ofs = 0x07038000, + .csi0_ofs = 0x06030000, + .csi1_ofs = 0x06038000, .ic_ofs = 0x06020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, From 99d86c8b88393e29cf07c020585f2c8afbcdd97d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 17 Jan 2019 12:30:17 +0100 Subject: [PATCH 0086/1436] perf ordered_events: Fix crash in ordered_events__free Song Liu reported crash in 'perf record': > #0 0x0000000000500055 in ordered_events(float, long double,...)(...) () > #1 0x0000000000500196 in ordered_events.reinit () > #2 0x00000000004fe413 in perf_session.process_events () > #3 0x0000000000440431 in cmd_record () > #4 0x00000000004a439f in run_builtin () > #5 0x000000000042b3e5 in main ()" This can happen when we get out of buffers during event processing. The subsequent ordered_events__free() call assumes oe->buffer != NULL and crashes. Add a check to prevent that. Reported-by: Song Liu Signed-off-by: Jiri Olsa Reviewed-by: Song Liu Tested-by: Song Liu Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190117113017.12977-1-jolsa@kernel.org Fixes: d5ceb62b3654 ("perf ordered_events: Add 'struct ordered_events_buffer' layer") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ordered-events.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 897589507d97..ea523d3b248f 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -391,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe) * Current buffer might not have all the events allocated * yet, we need to free only allocated ones ... */ - list_del(&oe->buffer->list); - ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + if (oe->buffer) { + list_del(&oe->buffer->list); + ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + } /* ... and continue with the rest */ list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { From ea6eb5e7d15e1838de335609994b4546e2abcaaf Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Thu, 17 Jan 2019 14:30:23 +0100 Subject: [PATCH 0087/1436] tracing: uprobes: Fix typo in pr_fmt string The subsystem-specific message prefix for uprobes was also "trace_kprobe: " instead of "trace_uprobe: " as described in the original commit message. Link: http://lkml.kernel.org/r/20190117133023.19292-1-andreas.ziegler@fau.de Cc: Ingo Molnar Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu Fixes: 7257634135c24 ("tracing/probe: Show subsystem name in messages") Signed-off-by: Andreas Ziegler Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e335576b9411..19a1a8e19062 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -5,7 +5,7 @@ * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju */ -#define pr_fmt(fmt) "trace_kprobe: " fmt +#define pr_fmt(fmt) "trace_uprobe: " fmt #include #include From 0722069a5374b904ec1a67f91249f90e1cfae259 Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Wed, 16 Jan 2019 15:16:29 +0100 Subject: [PATCH 0088/1436] tracing/uprobes: Fix output for multiple string arguments When printing multiple uprobe arguments as strings the output for the earlier arguments would also include all later string arguments. This is best explained in an example: Consider adding a uprobe to a function receiving two strings as parameters which is at offset 0xa0 in strlib.so and we want to print both parameters when the uprobe is hit (on x86_64): $ echo 'p:func /lib/strlib.so:0xa0 +0(%di):string +0(%si):string' > \ /sys/kernel/debug/tracing/uprobe_events When the function is called as func("foo", "bar") and we hit the probe, the trace file shows a line like the following: [...] func: (0x7f7e683706a0) arg1="foobar" arg2="bar" Note the extra "bar" printed as part of arg1. This behaviour stacks up for additional string arguments. The strings are stored in a dynamically growing part of the uprobe buffer by fetch_store_string() after copying them from userspace via strncpy_from_user(). The return value of strncpy_from_user() is then directly used as the required size for the string. However, this does not take the terminating null byte into account as the documentation for strncpy_from_user() cleary states that it "[...] returns the length of the string (not including the trailing NUL)" even though the null byte will be copied to the destination. Therefore, subsequent calls to fetch_store_string() will overwrite the terminating null byte of the most recently fetched string with the first character of the current string, leading to the "accumulation" of strings in earlier arguments in the output. Fix this by incrementing the return value of strncpy_from_user() by one if we did not hit the maximum buffer size. Link: http://lkml.kernel.org/r/20190116141629.5752-1-andreas.ziegler@fau.de Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 5baaa59ef09e ("tracing/probes: Implement 'memory' fetch method for uprobes") Acked-by: Masami Hiramatsu Signed-off-by: Andreas Ziegler Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 19a1a8e19062..9bde07c06362 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; + else + /* + * Include the terminating null byte. In this case it + * was copied by strncpy_from_user but not accounted + * for in ret. + */ + ret++; *(u32 *)dest = make_data_loc(ret, (void *)dst - base); } From 96167167b6e17b25c0e05ecc31119b73baeab094 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 17 Jan 2019 11:48:34 -0800 Subject: [PATCH 0089/1436] perf script: Fix crash with printing mixed trace point and other events 'perf script' crashes currently when printing mixed trace points and other events because the trace format does not handle events without trace meta data. Add a simple check to avoid that. % cat > test.c main() { printf("Hello world\n"); } ^D % gcc -g -o test test.c % sudo perf probe -x test 'test.c:3' % perf record -e '{cpu/cpu-cycles,period=10000/,probe_test:main}:S' ./test % perf script Committer testing: Before: # perf probe -x /lib64/libc-2.28.so malloc Added new event: probe_libc:malloc (on malloc in /usr/lib64/libc-2.28.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 # perf probe -l probe_libc:malloc (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.28.so) # perf record -e '{cpu/cpu-cycles,period=10000/,probe_libc:*}:S' sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.023 MB perf.data (40 samples) ] # perf script Segmentation fault (core dumped) ^C # After: # perf script | head -6 sleep 2888 94796.944981: 16198 cpu/cpu-cycles,period=10000/: ffffffff925dc04f get_random_u32+0x1f (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944981: probe_libc:malloc: sleep 2888 94796.944983: 4713 cpu/cpu-cycles,period=10000/: ffffffff922763af change_protection+0xcf (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944983: probe_libc:malloc: sleep 2888 94796.944986: 9934 cpu/cpu-cycles,period=10000/: ffffffff922777e0 move_page_tables+0x0 (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944986: probe_libc:malloc: # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190117194834.21940-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d079f36d342d..357906ed1898 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1794,7 +1794,7 @@ static void process_event(struct perf_script *script, return; } - if (PRINT_FIELD(TRACE)) { + if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } From 1a51c5da5acc6c188c917ba572eebac5f8793432 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 10 Jan 2019 17:17:16 -0800 Subject: [PATCH 0090/1436] perf core: Fix perf_proc_update_handler() bug The perf_proc_update_handler() handles /proc/sys/kernel/perf_event_max_sample_rate syctl variable. When the PMU IRQ handler timing monitoring is disabled, i.e, when /proc/sys/kernel/perf_cpu_time_max_percent is equal to 0 or 100, then no modification to sysctl_perf_event_sample_rate is allowed to prevent possible hang from wrong values. The problem is that the test to prevent modification is made after the sysctl variable is modified in perf_proc_update_handler(). You get an error: $ echo 10001 >/proc/sys/kernel/perf_event_max_sample_rate echo: write error: invalid argument But the value is still modified causing all sorts of inconsistencies: $ cat /proc/sys/kernel/perf_event_max_sample_rate 10001 This patch fixes the problem by moving the parsing of the value after the test. Committer testing: # echo 100 > /proc/sys/kernel/perf_cpu_time_max_percent # echo 10001 > /proc/sys/kernel/perf_event_max_sample_rate -bash: echo: write error: Invalid argument # cat /proc/sys/kernel/perf_event_max_sample_rate 10001 # Signed-off-by: Stephane Eranian Reviewed-by: Andi Kleen Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547169436-6266-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..e5ede6918050 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - if (ret || !write) - return ret; - + int ret; + int perf_cpu = sysctl_perf_cpu_time_max_percent; /* * If throttling is disabled don't allow the write: */ - if (sysctl_perf_cpu_time_max_percent == 100 || - sysctl_perf_cpu_time_max_percent == 0) + if (write && (perf_cpu == 100 || perf_cpu == 0)) return -EINVAL; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); From 94ec1eb711db69be1414b56b3160b816e86a5c5b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 18 Jan 2019 11:34:15 -0300 Subject: [PATCH 0091/1436] perf python: Remove -fstack-clash-protection when building with some clang versions These options are not present in some (all?) clang versions, so when we build for a distro that has a gcc new enough to have these options and that the distro python build config settings use them but clang doesn't support, b00m. This is the case with fedora rawhide (now gearing towards f30), so check if clang has the and remove the missing ones from CFLAGS. Cc: Eduardo Habkost Cc: Thiago Macieira Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-5q50q9w458yawgxf9ez54jbp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 63f758c655d5..64d1f36dee99 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -17,6 +17,8 @@ if cc == "clang": vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) + if not clang_has_option("-fstack-clash-protection"): + vars[var] = sub("-fstack-clash-protection", "", vars[var]) from distutils.core import setup, Extension From 0ce23d6d42147a692768e6baaaa3db75c44f4235 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 17 Jan 2019 17:32:05 +0000 Subject: [PATCH 0092/1436] ASoC: hdmi-codec: fix oops on re-probe hdmi-codec oopses the kernel when it is unbound from a successfully bound audio subsystem, and is then rebound: Unable to handle kernel NULL pointer dereference at virtual address 0000001c pgd = ee3f0000 [0000001c] *pgd=3cc59831 Internal error: Oops: 817 [#1] PREEMPT ARM Modules linked in: ext2 snd_soc_spdif_tx vmeta dove_thermal snd_soc_kirkwood ofpart marvell_cesa m25p80 orion_wdt mtd spi_nor des_generic gpio_ir_recv snd_soc_kirkwood_spdif bmm_dmabuf auth_rpcgss nfsd autofs4 etnaviv thermal_sys hwmon gpu_sched tda9950 CPU: 0 PID: 1005 Comm: bash Not tainted 4.20.0+ #1762 Hardware name: Marvell Dove (Cubox) PC is at hdmi_dai_probe+0x68/0x80 LR is at find_held_lock+0x20/0x94 pc : [] lr : [] psr: 600f0013 sp : ee15bd28 ip : eebd8b1c fp : c093b488 r10: ee048000 r9 : eebdab18 r8 : ee048600 r7 : 00000001 r6 : 00000000 r5 : 00000000 r4 : ee82c100 r3 : 00000006 r2 : 00000001 r1 : c067e38c r0 : ee82c100 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none[ 297.318599] Control: 10c5387d Table: 2e3f0019 DAC: 00000051 Process bash (pid: 1005, stack limit = 0xee15a248) ... [] (hdmi_dai_probe) from [] (soc_probe_dai.part.9+0x34/0x70) [] (soc_probe_dai.part.9) from [] (snd_soc_instantiate_card+0x734/0xc9c) [] (snd_soc_instantiate_card) from [] (snd_soc_add_component+0x29c/0x378) [] (snd_soc_add_component) from [] (snd_soc_register_component+0x44/0x54) [] (snd_soc_register_component) from [] (devm_snd_soc_register_component+0x48/0x84) [] (devm_snd_soc_register_component) from [] (hdmi_codec_probe+0x150/0x260) [] (hdmi_codec_probe) from [] (platform_drv_probe+0x48/0x98) This happens because hdmi_dai_probe() attempts to access the HDMI codec private data, but this has not been assigned by hdmi_dai_probe() before it calls devm_snd_soc_register_component(). Move the call to dev_set_drvdata() before devm_snd_soc_register_component() to avoid this oops. Signed-off-by: Russell King Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/hdmi-codec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index d00734d31e04..e5b6769b9797 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -795,6 +795,8 @@ static int hdmi_codec_probe(struct platform_device *pdev) if (hcd->spdif) hcp->daidrv[i] = hdmi_spdif_dai; + dev_set_drvdata(dev, hcp); + ret = devm_snd_soc_register_component(dev, &hdmi_driver, hcp->daidrv, dai_count); if (ret) { @@ -802,8 +804,6 @@ static int hdmi_codec_probe(struct platform_device *pdev) __func__, ret); return ret; } - - dev_set_drvdata(dev, hcp); return 0; } From 89857a8a5c89a406b967ab2be7bd2ccdbe75e73d Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Fri, 21 Dec 2018 16:41:42 +0200 Subject: [PATCH 0093/1436] soc: fsl: qbman: avoid race in clearing QMan interrupt By clearing all interrupt sources, not only those that already occurred, the existing code may acknowledge by mistake interrupts that occurred after the code checks for them. Signed-off-by: Madalin Bucur Signed-off-by: Roy Pledge Signed-off-by: Li Yang --- drivers/soc/fsl/qbman/qman.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 52c153cd795a..636f83f781f5 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1143,18 +1143,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0; if (unlikely(!is)) return IRQ_NONE; /* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; } From e3f72b749da2bf63bed7409e416f160418d475b6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Jan 2019 19:38:36 -0800 Subject: [PATCH 0094/1436] pinctrl: cherryview: fix Strago DMI workaround Well, hopefully 3rd time is a charm. We tried making that check DMI_BIOS_VERSION and DMI_BOARD_VERSION, but the real one is DMI_PRODUCT_VERSION. Fixes: 86c5dd6860a6 ("pinctrl: cherryview: limit Strago DMI workarounds to version 1.0") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=197953 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1631930 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 05044e323ea5..03ec7a5d9d0b 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1513,7 +1513,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1521,7 +1521,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1529,7 +1529,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, { @@ -1537,7 +1537,7 @@ static const struct dmi_system_id chv_no_valid_mask[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), - DMI_MATCH(DMI_BOARD_VERSION, "1.0"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), }, }, {} From f165988b77ef849eb0c1aebd94fe778024f88314 Mon Sep 17 00:00:00 2001 From: Jason Kridner Date: Fri, 11 Jan 2019 10:02:13 -0500 Subject: [PATCH 0095/1436] pinctrl: mcp23s08: spi: Fix regmap allocation for mcp23s18 Fixes issue created by 9b3e4207661e67f04c72af15e29f74cd944f5964. It wasn't possible for one_regmap_config to be non-NULL at the point it was tested for mcp23s18 devices. Applied the same pattern of allocating one_regmap_config using devm_kmemdump() and then initializing the local regmap structure from that. Signed-off-by: Jason Kridner Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index b03481ef99a1..98905d4a79ca 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -832,8 +832,13 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, break; case MCP_TYPE_S18: + one_regmap_config = + devm_kmemdup(dev, &mcp23x17_regmap, + sizeof(struct regmap_config), GFP_KERNEL); + if (!one_regmap_config) + return -ENOMEM; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, - &mcp23x17_regmap); + one_regmap_config); mcp->reg_shift = 1; mcp->chip.ngpio = 16; mcp->chip.label = "mcp23s18"; From 1497e804d1a6e2bd9107ddf64b0310449f4673eb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sat, 19 Jan 2019 00:12:39 -0800 Subject: [PATCH 0096/1436] perf tools: Handle TOPOLOGY headers with no CPU This patch fixes an issue in cpumap.c when used with the TOPOLOGY header. In some configurations, some NUMA nodes may have no CPU (empty cpulist). Yet a cpumap map must be created otherwise perf abort with an error. This patch handles this case by creating a dummy map. Before: $ perf record -o - -e cycles noploop 2 | perf script -i - 0x6e8 [0x6c]: failed to process type: 80 After: $ perf record -o - -e cycles noploop 2 | perf script -i - noploop for 2 seconds Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547885559-1657-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd3342069..383674f448fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: From da06d568386877809532e8ec678f4a5e300f0951 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 21 Jan 2019 00:05:22 +0800 Subject: [PATCH 0097/1436] perf top: Fix wrong hottest instruction highlighted The annotation line percentage is compared and inserted into the rbtree, but the percent field of 'struct annotation_data' is an array, the comparison result between them is the address difference. This patch compares the right slot of percent array according to opts->percent_type and makes things right. The problem can be reproduced by pressing 'H' in perf top annotation view. It should highlight the instruction line which has the highest sampling percentage. Signed-off-by: He Kuang Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190120160523.4391-1-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1d00e5ec7906..82e16bf84466 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -224,20 +224,24 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) +static double disasm__cmp(struct annotation_line *a, struct annotation_line *b, + int percent_type) { int i; for (i = 0; i < a->data_nr; i++) { - if (a->data[i].percent == b->data[i].percent) + if (a->data[i].percent[percent_type] == b->data[i].percent[percent_type]) continue; - return a->data[i].percent < b->data[i].percent; + return a->data[i].percent[percent_type] - + b->data[i].percent[percent_type]; } return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) +static void disasm_rb_tree__insert(struct annotate_browser *browser, + struct annotation_line *al) { + struct rb_root *root = &browser->entries; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct annotation_line *l; @@ -246,7 +250,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l)) + if (disasm__cmp(al, l, browser->opts->percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -329,7 +333,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al); + disasm_rb_tree__insert(browser, &pos->al); } pthread_mutex_unlock(¬es->lock); From 8bf8c6da53c2265aea365a1de6038f118f522113 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Sun, 20 Jan 2019 11:14:14 -0800 Subject: [PATCH 0098/1436] perf script: Fix crash when processing recorded stat data While updating perf to work with Python3 and Python2 I noticed that the stat-cpi script was dumping core. $ perf stat -e cycles,instructions record -o /tmp/perf.data /bin/false Performance counter stats for '/bin/false': 802,148 cycles 604,622 instructions 802,148 cycles 604,622 instructions 0.001445842 seconds time elapsed $ perf script -i /tmp/perf.data -s scripts/python/stat-cpi.py Segmentation fault (core dumped) ... ... rblist=rblist@entry=0xb2a200 , new_entry=new_entry@entry=0x7ffcb755c310) at util/rblist.c:33 ctx=, type=, create=, cpu=, evsel=) at util/stat-shadow.c:118 ctx=, type=, st=) at util/stat-shadow.c:196 count=count@entry=727442, cpu=cpu@entry=0, st=0xb2a200 ) at util/stat-shadow.c:239 config=config@entry=0xafeb40 , counter=counter@entry=0x133c6e0) at util/stat.c:372 ... ... The issue is that since 1fcd03946b52 perf_stat__update_shadow_stats now calls update_runtime_stat passing rt_stat rather than calling update_stats but perf_stat__init_shadow_stats has never been called to initialize rt_stat in the script path processing recorded stat data. Since I can't see any reason why perf_stat__init_shadow_stats() is presently initialized like it is in builtin-script.c::perf_sample__fprint_metric() [4bd1bef8bba2f] I'm proposing it instead be initialized once in __cmd_script Committer testing: After applying the patch: # perf script -i /tmp/perf.data -s tools/perf/scripts/python/stat-cpi.py 0.001970: cpu -1, thread -1 -> cpi 1.709079 (1075684/629394) # No segfault. Signed-off-by: Tony Jones Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Andi Kleen Cc: Jin Yao Fixes: 1fcd03946b52 ("perf stat: Update per-thread shadow stats") Link: http://lkml.kernel.org/r/20190120191414.12925-1-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 357906ed1898..ac221f137ed2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1681,13 +1681,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, .force_header = false, }; struct perf_evsel *ev2; - static bool init; u64 val; - if (!init) { - perf_stat__init_shadow_stats(); - init = true; - } if (!evsel->stats) perf_evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) @@ -2359,6 +2354,8 @@ static int __cmd_script(struct perf_script *script) signal(SIGINT, sig_handler); + perf_stat__init_shadow_stats(); + /* override event processing functions */ if (script->show_task_events) { script->tool.comm = process_comm_event; From 78ddc9b4417dacfbababb1c02f9987ebcc75c786 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Jan 2019 14:27:11 -0200 Subject: [PATCH 0099/1436] ASoC: MAINTAINERS: fsl: Change Fabio's email address I prefer to use my personal email address for kernel related work. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5b9c6af98283..9cd09c593c34 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6069,7 +6069,7 @@ FREESCALE SOC SOUND DRIVERS M: Timur Tabi M: Nicolin Chen M: Xiubo Li -R: Fabio Estevam +R: Fabio Estevam L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linuxppc-dev@lists.ozlabs.org S: Maintained @@ -10748,7 +10748,7 @@ F: include/linux/nvmem-consumer.h F: include/linux/nvmem-provider.h NXP SGTL5000 DRIVER -M: Fabio Estevam +M: Fabio Estevam L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/sgtl5000.txt From f832898dff55f243081697b6e5773f7318c8a344 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 19 Jan 2019 16:00:26 +0100 Subject: [PATCH 0100/1436] mt76: do not report out-of-range rx nss Take into account device rx stream in mt76x02_mac_process_rate in order to not report wrong number of rx spatial stream to mac80211. The issue has been reported on mt76x0 device which is 1x1:1 however the hw sometimes reports rx nss equal to 2 Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 68d01e136fa0..ce9bb702045f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -476,7 +476,9 @@ out: } static int -mt76x02_mac_process_rate(struct mt76_rx_status *status, u16 rate) +mt76x02_mac_process_rate(struct mt76x02_dev *dev, + struct mt76_rx_status *status, + u16 rate) { u8 idx = FIELD_GET(MT_RXWI_RATE_INDEX, rate); @@ -508,11 +510,15 @@ mt76x02_mac_process_rate(struct mt76_rx_status *status, u16 rate) status->encoding = RX_ENC_HT; status->rate_idx = idx; break; - case MT_PHY_TYPE_VHT: + case MT_PHY_TYPE_VHT: { + u8 n_rxstream = dev->mt76.chainmask & 0xf; + status->encoding = RX_ENC_VHT; status->rate_idx = FIELD_GET(MT_RATE_INDEX_VHT_IDX, idx); - status->nss = FIELD_GET(MT_RATE_INDEX_VHT_NSS, idx) + 1; + status->nss = min_t(u8, n_rxstream, + FIELD_GET(MT_RATE_INDEX_VHT_NSS, idx) + 1); break; + } default: return -EINVAL; } @@ -656,7 +662,7 @@ int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb, status->tid = FIELD_GET(MT_RXWI_TID, tid_sn); status->seqno = FIELD_GET(MT_RXWI_SN, tid_sn); - return mt76x02_mac_process_rate(status, rate); + return mt76x02_mac_process_rate(dev, status, rate); } void mt76x02_mac_poll_tx_status(struct mt76x02_dev *dev, bool irq) From 8af526e035357d2df1d7cacc51bcde0a5d9f9f9f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 15 Jan 2019 16:45:32 +0200 Subject: [PATCH 0101/1436] RDMA/mlx5: Fix check for supported user flags when creating a QP When the flags verification was added two flags were missed from the check: * MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC * MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC This causes user applications that were using these flags to break. Fixes: 2e43bb31b8df ("IB/mlx5: Verify that driver supports user flags") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index dd2ae640bc84..7db778d96ef5 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1912,14 +1912,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } if (!check_flags_mask(ucmd.flags, + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_BFREG_INDEX | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | + MLX5_QP_FLAG_SCATTER_CQE | MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_TYPE_DCT | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TYPE_DCI | + MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), From f45f8edbe1d345273ba0d8d8a9872bb30966869b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 15 Jan 2019 16:45:48 +0200 Subject: [PATCH 0102/1436] IB/mlx4: Fix using wrong function to destroy sqp AHs under SRIOV The commit cited below replaced rdma_create_ah with mlx4_ib_create_slave_ah when creating AHs for the paravirtualized special QPs. However, this change also required replacing rdma_destroy_ah with mlx4_ib_destroy_ah in the affected flows. The commit missed 3 places where rdma_destroy_ah should have been replaced with mlx4_ib_destroy_ah. As a result, the pd usecount was decremented when the ah was destroyed -- although the usecount was NOT incremented when the ah was created. This caused the pd usecount to become negative, and resulted in the WARN_ON stack trace below when the mlx4_ib.ko module was unloaded: WARNING: CPU: 3 PID: 25303 at drivers/infiniband/core/verbs.c:329 ib_dealloc_pd+0x6d/0x80 [ib_core] Modules linked in: rdma_ucm rdma_cm iw_cm ib_cm ib_umad mlx4_ib(-) ib_uverbs ib_core mlx4_en mlx4_core nfsv3 nfs fscache configfs xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c ipt_REJECT nf_reject_ipv4 tun ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter bridge stp llc dm_mirror dm_region_hash dm_log dm_mod dax rndis_wlan rndis_host coretemp kvm_intel cdc_ether kvm usbnet iTCO_wdt iTCO_vendor_support cfg80211 irqbypass lpc_ich ipmi_si i2c_i801 mii pcspkr i2c_core mfd_core ipmi_devintf i7core_edac ipmi_msghandler ioatdma pcc_cpufreq dca acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sr_mod cdrom ata_generic pata_acpi mptsas scsi_transport_sas mptscsih crc32c_intel ata_piix bnx2 mptbase ipv6 crc_ccitt autofs4 [last unloaded: mlx4_core] CPU: 3 PID: 25303 Comm: modprobe Tainted: G W I 5.0.0-rc1-net-mlx4+ #1 Hardware name: IBM -[7148ZV6]-/Node 1, System Card, BIOS -[MLE170CUS-1.70]- 09/23/2011 RIP: 0010:ib_dealloc_pd+0x6d/0x80 [ib_core] Code: 00 00 85 c0 75 02 5b c3 80 3d aa 87 03 00 00 75 f5 48 c7 c7 88 d7 8f a0 31 c0 c6 05 98 87 03 00 01 e8 07 4c 79 e0 0f 0b 5b c3 <0f> 0b eb be 0f 0b eb ab 90 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 RSP: 0018:ffffc90005347e30 EFLAGS: 00010282 RAX: 00000000ffffffea RBX: ffff8888589e9540 RCX: 0000000000000006 RDX: 0000000000000006 RSI: ffff88885d57ad40 RDI: 0000000000000000 RBP: ffff88885b029c00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000004 R12: ffff8887f06c0000 R13: ffff8887f06c13e8 R14: 0000000000000000 R15: 0000000000000000 FS: 00007fd6743c6740(0000) GS:ffff88887fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000ed1038 CR3: 00000007e3156000 CR4: 00000000000006e0 Call Trace: mlx4_ib_close_sriov+0x125/0x180 [mlx4_ib] mlx4_ib_remove+0x57/0x1f0 [mlx4_ib] mlx4_remove_device+0x92/0xa0 [mlx4_core] mlx4_unregister_interface+0x39/0x90 [mlx4_core] mlx4_ib_cleanup+0xc/0xd7 [mlx4_ib] __x64_sys_delete_module+0x17d/0x290 ? trace_hardirqs_off_thunk+0x1a/0x1c ? do_syscall_64+0x12/0x180 do_syscall_64+0x4a/0x180 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 5e62d5ff1b9a ("IB/mlx4: Create slave AH's directly") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 25439da8976c..936ee1314bcd 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; @@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - rdma_destroy_ah(sqp->tx_ring[wc.wr_id & + mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; From 904bba211acc2112fdf866e5a2bc6cd9ecd0de1b Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 17 Jan 2019 12:41:32 -0800 Subject: [PATCH 0103/1436] IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM The work completion length for a receiving a UD send with immediate is short by 4 bytes causing application using this opcode to fail. The UD receive logic incorrectly subtracts 4 bytes for immediate value. These bytes are already included in header length and are used to calculate header/payload split, so the result is these 4 bytes are subtracted twice, once when the header length subtracted from the overall length and once again in the UD opcode specific path. Remove the extra subtraction when handling the opcode. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Michael J. Ruhl Signed-off-by: Brian Welty Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ud.c | 1 - drivers/infiniband/hw/qib/qib_ud.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 88242fe95eaa..bf96067876c9 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -987,7 +987,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 868da0ece7ba..445ea19a2ec8 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -512,7 +512,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; From 7709b0dc265f28695487712c45f02bbd1f98415d Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 17 Jan 2019 12:42:04 -0800 Subject: [PATCH 0104/1436] IB/hfi1: Remove overly conservative VM_EXEC flag check Applications that use the stack for execution purposes cause userspace PSM jobs to fail during mmap(). Both Fortran (non-standard format parsing) and C (callback functions located in the stack) applications can be written such that stack execution is required. The linker notes this via the gnu_stack ELF flag. This causes READ_IMPLIES_EXEC to be set which forces all PROT_READ mmaps to have PROT_EXEC for the process. Checking for VM_EXEC bit and failing the request with EPERM is overly conservative and will break any PSM application using executable stacks. Cc: #v4.14+ Fixes: 12220267645c ("IB/hfi: Protect against writable mmap") Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c22ebc774a6a..f9a7e9d29c8b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: - if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + if (flags & VM_WRITE) { ret = -EPERM; goto done; } From 09ce351dff8e7636af0beb72cd4a86c3904a0500 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 17 Jan 2019 12:42:16 -0800 Subject: [PATCH 0105/1436] IB/hfi1: Add limit test for RC/UC send via loopback Fix potential memory corruption and panic in loopback for IB_WR_SEND variants. The code blindly assumes the posted length will fit in the fetched rwqe, which is not a valid assumption. Fix by adding a limit test, and triggering the appropriate send completion and putting the QP in an error state. This mimics the handling for non-loopback QPs. Fixes: 15703461533a ("IB/{hfi1, qib, rdmavt}: Move ruc_loopback to rdmavt") Cc: #v4.20+ Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a1bd8cfc2c25..c6cc3e4ab71d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2910,6 +2910,8 @@ send: goto op_err; if (!ret) goto rnr_nak; + if (wqe->length > qp->r_len) + goto inv_err; break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3078,7 +3080,10 @@ op_err: goto err; inv_err: - send_status = IB_WC_REM_INV_REQ_ERR; + send_status = + sqp->ibqp.qp_type == IB_QPT_RC ? + IB_WC_REM_INV_REQ_ERR : + IB_WC_SUCCESS; wc.status = IB_WC_LOC_QP_OP_ERR; goto err; From d79af7242bb237d00e40092810e6828fbb929d2d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 10 Jan 2019 14:02:24 -0700 Subject: [PATCH 0106/1436] RDMA/device: Expose ib_device_try_get(() It turns out future patches need this capability quite widely now, not just for netlink, so provide two global functions to manage the registration lock refcount. This also moves the point the lock becomes 1 to within ib_register_device() so that the semantics of the public API are very sane and clear. Calling ib_device_try_get() will fail on devices that are only allocated but not yet registered. Signed-off-by: Jason Gunthorpe Reviewed-by: Steve Wise Reviewed-by: Parav Pandit --- drivers/infiniband/core/core_priv.h | 1 - drivers/infiniband/core/device.c | 13 ++++++++++--- include/rdma/ib_verbs.h | 24 ++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3cd830d52967..616734313f0c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, #endif struct ib_device *ib_device_get_by_index(u32 ifindex); -void ib_device_put(struct ib_device *device); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8872453e26c0..238ec42778ef 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -156,19 +156,26 @@ struct ib_device *ib_device_get_by_index(u32 index) down_read(&lists_rwsem); device = __ib_device_get_by_index(index); if (device) { - /* Do not return a device if unregistration has started. */ - if (!refcount_inc_not_zero(&device->refcount)) + if (!ib_device_try_get(device)) device = NULL; } up_read(&lists_rwsem); return device; } +/** + * ib_device_put - Release IB device reference + * @device: device whose reference to be released + * + * ib_device_put() releases reference to the IB device to allow it to be + * unregistered and eventually free. + */ void ib_device_put(struct ib_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->unreg_completion); } +EXPORT_SYMBOL(ib_device_put); static struct ib_device *__ib_device_get_by_name(const char *name) { @@ -303,7 +310,6 @@ struct ib_device *ib_alloc_device(size_t size) rwlock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); - refcount_set(&device->refcount, 1); init_completion(&device->unreg_completion); return device; @@ -620,6 +626,7 @@ int ib_register_device(struct ib_device *device, const char *name, goto cg_cleanup; } + refcount_set(&device->refcount, 1); device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a3ceed3a040a..80debf5982ac 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2579,9 +2579,10 @@ struct ib_device { const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; + /* - * Provides synchronization between device unregistration and netlink - * commands on a device. To be used only by core. + * Positive refcount indicates that the device is currently + * registered and cannot be unregistered. */ refcount_t refcount; struct completion unreg_completion; @@ -3926,6 +3927,25 @@ static inline bool ib_access_writable(int access_flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +/** + * ib_device_try_get: Hold a registration lock + * device: The device to lock + * + * A device under an active registration lock cannot become unregistered. It + * is only possible to obtain a registration lock on a device that is fully + * registered, otherwise this function returns false. + * + * The registration lock is only necessary for actions which require the + * device to still be registered. Uses that only require the device pointer to + * be valid should use get_device(&ibdev->dev) to hold the memory. + * + */ +static inline bool ib_device_try_get(struct ib_device *dev) +{ + return refcount_inc_not_zero(&dev->refcount); +} + +void ib_device_put(struct ib_device *device); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); From 951d01b96f174ded6180e7e4e14929ef22e7da7e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 11 Jan 2019 19:31:24 -0700 Subject: [PATCH 0107/1436] IB/mlx5: Fix how advise_mr() launches async work Work must hold a kref on the ib_device otherwise the dev pointer can become free before the work runs. This can happen because the work is being pushed onto the system work queue which is not flushed during driver unregister. Remove the bogus use of 'reg_state': - While in uverbs the reg_state is guaranteed to always be REGISTERED - Testing reg_state with no locking is bogus. Use ib_device_try_get() to get back into a region that prevents unregistration. For now continue with a flow that is similar to the existing code. Fixes: 813e90b1aeaa ("IB/mlx5: Add advise_mr() support") Signed-off-by: Jason Gunthorpe Reviewed-by: Moni Shoua --- drivers/infiniband/hw/mlx5/odp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 01e0f6200631..4ee32964e1dd 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1595,10 +1595,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) struct prefetch_mr_work *w = container_of(work, struct prefetch_mr_work, work); - if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + if (ib_device_try_get(&w->dev->ib_dev)) { mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, w->num_sge); - + ib_device_put(&w->dev->ib_dev); + } + put_device(&w->dev->ib_dev.dev); kfree(w); } @@ -1617,15 +1619,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, num_sge); - if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) - return -ENODEV; - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + get_device(&dev->ib_dev.dev); work->dev = dev; work->pf_flags = pf_flags; work->num_sge = num_sge; From 10098709b4ee6f6f19f25ba81d9c6f83518c584c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Jan 2019 10:45:43 +0800 Subject: [PATCH 0108/1436] pinctrl: sunxi: Correct number of IRQ banks on H6 main pin controller The H6 main pin controller has four banks of interrupt-triggering pins. The driver as originally submitted only specified three, but had pin descriptions referencing a fourth bank. This results in a out-of-bounds access into .irq_array of struct sunxi_pinctrl. This however did not result in a crash until v4.20, with commit a66d972465d1 ("devres: Align data[] to ARCH_KMALLOC_MINALIGN"), which changed the alignment of memory region returned by devm_kcalloc(). The increase likely moved the out-of-bounds access into the next, unmapped page. With KASAN on, the bug is quite clear: BUG: KASAN: slab-out-of-bounds in sunxi_pinctrl_init_with_variant+0x49c/0x12b8 Write of size 4 at addr ffff80002c680280 by task swapper/0/1 CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc1-00016-gc480a5e6a077 #3 Hardware name: OrangePi Lite2 (DT) Call trace: dump_backtrace+0x0/0x220 show_stack+0x14/0x20 dump_stack+0xac/0xd4 print_address_description+0x60/0x25c kasan_report+0x14c/0x1ac __asan_store4+0x80/0xa0 sunxi_pinctrl_init_with_variant+0x49c/0x12b8 h6_pinctrl_probe+0x18/0x20 platform_drv_probe+0x6c/0xc8 really_probe+0x244/0x4b0 driver_probe_device.part.4+0x11c/0x164 __driver_attach+0x120/0x190 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x308/0x318 driver_register+0xbc/0x1d0 __platform_driver_register+0x7c/0x88 h6_pinctrl_driver_init+0x18/0x20 do_one_initcall+0xd4/0x208 kernel_init_freeable+0x230/0x2c8 kernel_init+0x10/0x108 ret_from_fork+0x10/0x1c Allocated by task 1: kasan_kmalloc.part.0+0x4c/0x100 kasan_kmalloc+0xc4/0xe8 kasan_slab_alloc+0x14/0x20 __kmalloc_track_caller+0x130/0x238 devm_kmalloc+0x34/0xd0 sunxi_pinctrl_init_with_variant+0x1d8/0x12b8 h6_pinctrl_probe+0x18/0x20 platform_drv_probe+0x6c/0xc8 really_probe+0x244/0x4b0 driver_probe_device.part.4+0x11c/0x164 __driver_attach+0x120/0x190 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x308/0x318 driver_register+0xbc/0x1d0 __platform_driver_register+0x7c/0x88 h6_pinctrl_driver_init+0x18/0x20 do_one_initcall+0xd4/0x208 kernel_init_freeable+0x230/0x2c8 kernel_init+0x10/0x108 ret_from_fork+0x10/0x1c Freed by task 0: (stack is not available) The buggy address belongs to the object at ffff80002c680080 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 0 bytes to the right of 512-byte region [ffff80002c680080, ffff80002c680280) The buggy address belongs to the page: page:ffff7e0000b1a000 count:1 mapcount:0 mapping:ffff80002e00c780 index:0xffff80002c683c80 compound_mapcount: 0 flags: 0x10200(slab|head) raw: 0000000000010200 ffff80002e003a10 ffff80002e003a10 ffff80002e00c780 raw: ffff80002c683c80 0000000000100001 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff80002c680180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff80002c680200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff80002c680280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff80002c680300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff80002c680380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Correct the number of IRQ banks so there are no more mismatches. Fixes: c8a830904991 ("pinctrl: sunxi: add support for the Allwinner H6 main pin controller") Cc: Signed-off-by: Chen-Yu Tsai Tested-by: Neil Armstrong Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c index aa8b58125568..ef4268cc6227 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h6.c @@ -588,7 +588,7 @@ static const unsigned int h6_irq_bank_map[] = { 1, 5, 6, 7 }; static const struct sunxi_pinctrl_desc h6_pinctrl_data = { .pins = h6_pins, .npins = ARRAY_SIZE(h6_pins), - .irq_banks = 3, + .irq_banks = 4, .irq_bank_map = h6_irq_bank_map, .irq_read_needs_mux = true, }; From 51d8838d66d3249508940d8f59b07701f2129723 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 17 Jan 2019 12:29:02 -0700 Subject: [PATCH 0109/1436] iommu/amd: Call free_iova_fast with pfn in map_sg In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..418df8ff3e50 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2623,7 +2623,7 @@ out_unmap: } out_free_iova: - free_iova_fast(&dma_dom->iovad, address, npages); + free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; From f1724c0883bb0ce93b8dcb94b53dcca3b75ac9a7 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 19 Jan 2019 10:38:05 -0700 Subject: [PATCH 0110/1436] iommu/amd: Unmap all mapped pages in error path of map_sg In the error path of map_sg there is an incorrect if condition for breaking out of the loop that searches the scatterlist for mapped pages to unmap. Instead of breaking out of the loop once all the pages that were mapped have been unmapped, it will break out of the loop after it has unmapped 1 page. Fix the condition, so it breaks out of the loop only after all the mapped pages have been unmapped. Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 418df8ff3e50..843b825cc1c4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2617,7 +2617,7 @@ out_unmap: bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } } From 3a34c986324c07dde32903f7bb262e6138e77c2a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 19 Dec 2018 16:16:03 +0800 Subject: [PATCH 0111/1436] cacheinfo: Keep the old value if of_property_read_u32 fails Commit 448a5a552f336bd7b847b1951 ("drivers: base: cacheinfo: use OF property_read_u32 instead of get_property,read_number") makes cache size and number_of_sets be 0 if DT doesn't provide there values. I think this is unreasonable so make them keep the old values, which is the same as old kernels. Fixes: 448a5a552f33 ("drivers: base: cacheinfo: use OF property_read_u32 instead of get_property,read_number") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index cf78fa6d470d..a7359535caf5 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -79,8 +79,7 @@ static void cache_size(struct cacheinfo *this_leaf, struct device_node *np) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].size_prop; - if (of_property_read_u32(np, propname, &this_leaf->size)) - this_leaf->size = 0; + of_property_read_u32(np, propname, &this_leaf->size); } /* not cache_line_size() because that's a macro in include/linux/cache.h */ @@ -114,8 +113,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].nr_sets_prop; - if (of_property_read_u32(np, propname, &this_leaf->number_of_sets)) - this_leaf->number_of_sets = 0; + of_property_read_u32(np, propname, &this_leaf->number_of_sets); } static void cache_associativity(struct cacheinfo *this_leaf) From 198bc3252ea3a45b0c5d500e6a5b91cfdd08f001 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 16 Jan 2019 20:11:44 +0100 Subject: [PATCH 0112/1436] iommu/vt-d: Fix memory leak in intel_iommu_put_resv_regions() Commit 9d3a4de4cb8d ("iommu: Disambiguate MSI region types") changed the reserved region type in intel_iommu_get_resv_regions() from IOMMU_RESV_RESERVED to IOMMU_RESV_MSI, but it forgot to also change the type in intel_iommu_put_resv_regions(). This leads to a memory leak, because now the check in intel_iommu_put_resv_regions() for IOMMU_RESV_RESERVED will never be true, and no allocated regions will be freed. Fix this by changing the region type in intel_iommu_put_resv_regions() to IOMMU_RESV_MSI, matching the type of the allocated regions. Fixes: 9d3a4de4cb8d ("iommu: Disambiguate MSI region types") Cc: # v4.11+ Signed-off-by: Gerald Schaefer Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2bd9ac285c0d..1457f931218e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5294,7 +5294,7 @@ static void intel_iommu_put_resv_regions(struct device *dev, struct iommu_resv_region *entry, *next; list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_RESERVED) + if (entry->type == IOMMU_RESV_MSI) kfree(entry); } } From 68f7030f2d5f9592fcae405296376073eff69cbd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 22 Jan 2019 16:33:53 +0100 Subject: [PATCH 0113/1436] mt76: fix per-chain signal strength reporting Fix an off-by-one error that resulted in not reporting the signal strength for the first chain Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index ce9bb702045f..63fa27d2c404 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -648,7 +648,7 @@ int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb, status->chains = BIT(0); signal = mt76x02_mac_get_rssi(dev, rxwi->rssi[0], 0); - for (i = 1; i < nstreams; i++) { + for (i = 0; i < nstreams; i++) { status->chains |= BIT(i); status->chain_signal[i] = mt76x02_mac_get_rssi(dev, rxwi->rssi[i], From d225581df3147060bc99e931b11f7cf2dcb1b2ca Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 21 Jan 2019 17:33:38 +0100 Subject: [PATCH 0114/1436] mt76: avoid scheduling tx queues for powersave stations In case a tx queue wake call arrives after a client has transitioned to powersave, make sure that the queue is not kept active until the client has left powersave mode Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 1ea08ce62713..ef38e8626da9 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -396,6 +396,11 @@ mt76_txq_send_burst(struct mt76_dev *dev, struct mt76_queue *hwq, bool probe; int idx; + if (test_bit(MT_WCID_FLAG_PS, &wcid->flags)) { + *empty = true; + return 0; + } + skb = mt76_txq_dequeue(dev, mtxq, false); if (!skb) { *empty = true; From 8077ec011b1ea26abb7ca786f28ecccfb352717f Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 22 Jan 2019 15:50:09 +0800 Subject: [PATCH 0115/1436] ASoC: rt5682: Correct the setting while select ASRC clk for AD/DA filter AD/DA ASRC function control two ASRC clock sources separately. Whether AD/DA filter select which clock source, we enable AD/DA ASRC function for all cases. Signed-off-by: Shuming Fan Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 89c43b26c379..a9b91bcfcc09 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1778,7 +1778,9 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = { {"ADC Stereo1 Filter", NULL, "ADC STO1 ASRC", is_using_asrc}, {"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc}, {"ADC STO1 ASRC", NULL, "AD ASRC"}, + {"ADC STO1 ASRC", NULL, "DA ASRC"}, {"ADC STO1 ASRC", NULL, "CLKDET"}, + {"DAC STO1 ASRC", NULL, "AD ASRC"}, {"DAC STO1 ASRC", NULL, "DA ASRC"}, {"DAC STO1 ASRC", NULL, "CLKDET"}, From 13e62626c578d9889ebbda7c521be5adff9bef8e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 16 Jan 2019 12:37:23 +0100 Subject: [PATCH 0116/1436] wlcore: sdio: Fixup power on/off sequence During "wlan-up", we are programming the FW into the WiFi-chip. However, re-programming the FW doesn't work, unless a power cycle of the WiFi-chip is made in-between the programmings. To conform to this requirement and to fix the regression in a simple way, let's start by allowing that the SDIO card (WiFi-chip) may stay powered on (runtime resumed) when wl12xx_sdio_power_off() returns. The intent with the current code is to treat this scenario as an error, but unfortunate this doesn't work as expected, so let's fix this. The other part is to guarantee that a power cycle of the SDIO card has been completed when wl12xx_sdio_power_on() returns, as to allow the FW programming to succeed. However, relying solely on runtime PM to deal with this isn't sufficient. For example, userspace may prevent runtime suspend via sysfs for the device that represents the SDIO card, leading to that the mmc core also keeps it powered on. For this reason, let's instead do a brute force power cycle in wl12xx_sdio_power_on(). Fixes: 728a9dc61f13 ("wlcore: sdio: Fix flakey SDIO runtime PM handling") Signed-off-by: Ulf Hansson Tested-by: Tony Lindgren Tested-by: Anders Roxell Signed-off-by: Ulf Hansson Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/sdio.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index bd10165d7eec..4d4b07701149 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) } sdio_claim_host(func); + /* + * To guarantee that the SDIO card is power cycled, as required to make + * the FW programming to succeed, let's do a brute force HW reset. + */ + mmc_hw_reset(card->host); + sdio_enable_func(func); sdio_release_host(func); @@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue) { struct sdio_func *func = dev_to_sdio_func(glue->dev); struct mmc_card *card = func->card; - int error; sdio_claim_host(func); sdio_disable_func(func); sdio_release_host(func); /* Let runtime PM know the card is powered off */ - error = pm_runtime_put(&card->dev); - if (error < 0 && error != -EBUSY) { - dev_err(&card->dev, "%s failed: %i\n", __func__, error); - - return error; - } - + pm_runtime_put(&card->dev); return 0; } From 983a5a43ec254cd5ddf3254db80ca96e8f8bb2a4 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 22 Jan 2019 09:03:08 -0800 Subject: [PATCH 0117/1436] clocksource: timer-ti-dm: Fix pwm dmtimer usage of fck reparenting Commit 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") moved some omap4 timers to probe with ti-sysc interconnect target module. Turns out this broke pwm-omap-dmtimer where we now try to reparent the clock to itself with the following: omap_dm_timer_of_set_source: failed to set parent With ti-sysc, we can now configure the clock sources in the dts with assigned-clocks and assigned-clock-parents. So we should be able to remove omap_dm_timer_of_set_source with clean-up patches later on. But for now, let's just fix it first by checking if parent and fck are the same and bail out of so. Fixes: 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") Cc: Bartosz Golaszewski Cc: Daniel Lezcano Cc: H. Nikolaus Schaller Cc: Keerthy Cc: Ladislav Michl Cc: Pavel Machek Cc: Sebastian Reichel Cc: Tero Kristo Cc: Thierry Reding Cc: Thomas Gleixner Reported-by: H. Nikolaus Schaller Tested-By: Andreas Kemnade Tested-By: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- drivers/clocksource/timer-ti-dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 4cce6b224b87..3ecf84706640 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -154,6 +154,10 @@ static int omap_dm_timer_of_set_source(struct omap_dm_timer *timer) if (IS_ERR(parent)) return -ENODEV; + /* Bail out if both clocks point to fck */ + if (clk_is_match(parent, timer->fclk)) + return 0; + ret = clk_set_parent(timer->fclk, parent); if (ret < 0) pr_err("%s: failed to set parent\n", __func__); From 752bcf80f5549c9901b2e8bc77b2138de55b1026 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Jan 2019 13:58:17 +0100 Subject: [PATCH 0118/1436] bpftool: Fix prog dump by tag Lance reported an issue with bpftool not being able to dump program if there are more programs loaded and you want to dump any but the first program, like: # bpftool prog 28: kprobe name trace_req_start tag 1dfc28ba8b3dd597 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 112B jited 109B memlock 4096B map_ids 13 29: kprobe name trace_req_compl tag 5b6a5ecc6030a683 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 928B jited 575B memlock 4096B map_ids 13,14 # bpftool prog dum jited tag 1dfc28ba8b3dd597 0: push %rbp 1: mov %rsp,%rbp ... # bpftool prog dum jited tag 5b6a5ecc6030a683 Error: can't get prog info (29): Bad address The problem is in the prog_fd_by_tag function not cleaning the struct bpf_prog_info before another request, so the previous program length is still in there and kernel assumes it needs to dump the program, which fails because there's no user pointer set. Moving the struct bpf_prog_info declaration into the loop, so it gets cleaned before each query. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Reported-by: Lance Digby Signed-off-by: Jiri Olsa Reviewed-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2d1bb7d6ff51..b54ed82b9589 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd; while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); From b0ca5ecb8e2279d706261f525f1bd0ba9e3fe800 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 21 Jan 2019 12:36:12 +0100 Subject: [PATCH 0119/1436] bpftool: fix percpu maps updating When updating a percpu map, bpftool currently copies the provided value only into the first per CPU copy of the specified value, all others instances are left zeroed. This change explicitly copies the user-provided bytes to all the per CPU instances, keeping the sub-command syntax unchanged. v2 -> v3: - drop unused argument, as per Quentin's suggestion v1 -> v2: - rename the helper as per Quentin's suggestion Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Paolo Abeni Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2037e3dc864b..29a3468c6cf6 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -347,6 +347,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, return argv + i; } +/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@ -426,6 +440,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info, argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); } return parse_elem(argv, info, key, NULL, key_size, value_size, From 6a6a797625b5fe855bf60c798561896e311d58ce Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 22 Jan 2019 15:25:45 +0000 Subject: [PATCH 0120/1436] ARM: dts: r8a7743: Convert to new LVDS DT bindings The internal LVDS encoder now has DT bindings separate from the DU. Port the device tree over to the new model. Fixes: c6a27fa41fab ("drm: rcar-du: Convert LVDS encoder code to bridge driver") Signed-off-by: Biju Das Reviewed-by: Laurent Pinchart Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7743.dtsi | 36 ++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/r8a7743.dtsi b/arch/arm/boot/dts/r8a7743.dtsi index 3cc33f7ff7fe..3adc158a40bb 100644 --- a/arch/arm/boot/dts/r8a7743.dtsi +++ b/arch/arm/boot/dts/r8a7743.dtsi @@ -1681,15 +1681,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7743"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = , ; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1704,6 +1701,33 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7743-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7743_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; From eb0200a4357da100064971689d3a0e9e3cf57f33 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 18 Dec 2018 16:46:29 +0100 Subject: [PATCH 0121/1436] gpu: ipu-v3: pre: don't trigger update if buffer address doesn't change On a NOP double buffer update where current buffer address is the same as the next buffer address, the SDW_UPDATE bit clears too late. As we are now using this bit to determine when it is safe to signal flip completion to userspace this will delay completion of atomic commits where one plane doesn't change the buffer by a whole frame period. Fix this by remembering the last buffer address and just skip the double buffer update if it would not change the buffer address. Signed-off-by: Lucas Stach [p.zabel@pengutronix.de: initialize last_bufaddr in ipu_pre_configure] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-pre.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index 2f8db9d62551..4a28f3fbb0a2 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -106,6 +106,7 @@ struct ipu_pre { void *buffer_virt; bool in_use; unsigned int safe_window_end; + unsigned int last_bufaddr; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -185,6 +186,7 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; val = IPU_PRE_PREF_ENG_CTRL_INPUT_PIXEL_FORMAT(0) | IPU_PRE_PREF_ENG_CTRL_INPUT_ACTIVE_BPP(active_bpp) | @@ -242,7 +244,11 @@ void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) unsigned short current_yblock; u32 val; + if (bufaddr == pre->last_bufaddr) + return; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; do { if (time_after(jiffies, timeout)) { From c9e4576743eeda8d24dedc164d65b78877f9a98c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 23 Jan 2019 12:37:19 +0800 Subject: [PATCH 0122/1436] bpf: sock recvbuff must be limited by rmem_max in bpf_setsockopt() When sock recvbuff is set by bpf_setsockopt(), the value must by limited by rmem_max. It is the same with sendbuff. Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yafang Shao Acked-by: Martin KaFai Lau Acked-by: Lawrence Brakmo Signed-off-by: Daniel Borkmann --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 7559d6835ecb..7a54dc11ac2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4112,10 +4112,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; From da5d2748e4a4512237764d2a53bdf686eccee18b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Jan 2019 08:46:29 +0100 Subject: [PATCH 0123/1436] iommu/mediatek: Use correct fwspec in mtk_iommu_add_device() The mtk_iommu_add_device() function keeps the fwspec in an on-stack pointer and calls mtk_iommu_create_mapping(), which might change its source, dev->iommu_fwspec. This causes the on-stack pointer to be obsoleted and the device initialization to fail. Update the on-stack fwspec pointer after mtk_iommu_create_mapping() has been called. Reported-by: Frank Wunderlich Fixes: a9bf2eec5a6f ('iommu/mediatek: Use helper functions to access dev->iommu_fwspec') Tested-by: Frank Wunderlich Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6ede4286b835..f60bdb85c4c0 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -442,6 +442,10 @@ static int mtk_iommu_add_device(struct device *dev) iommu_spec.args_count = count; mtk_iommu_create_mapping(dev, &iommu_spec); + + /* dev->iommu_fwspec might have changed */ + fwspec = dev_iommu_fwspec_get(dev); + of_node_put(iommu_spec.np); } From 5fa98c2eda35fcd1a1d91a9785c390f54688240c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 22 Jan 2019 09:10:10 -0600 Subject: [PATCH 0124/1436] dt-bindings: Fix dt_binding_check target for in tree builds On in tree builds, subsequent builds will incorrectly include the intermediate file 'processed-schema.yaml' with the input schema files resulting in a build error. Update the find command to ignore processed-schema.yaml. Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 6e5cef0ed6fb..50daa0b3b032 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -17,7 +17,11 @@ extra-y += $(DT_TMP_SCHEMA) quiet_cmd_mk_schema = SCHEMA $@ cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(filter-out FORCE, $^) -DT_DOCS = $(shell cd $(srctree)/$(src) && find * -name '*.yaml') +DT_DOCS = $(shell \ + cd $(srctree)/$(src) && \ + find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \ + ) + DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS)) extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES)) From 2486e67374aa8b7854c2de32869642c2873b3d53 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 9 Jan 2019 11:11:24 +0200 Subject: [PATCH 0125/1436] gpio: pcf857x: Fix interrupts on multiple instances When multiple instances of pcf857x chips are present, a fix up message [1] is printed during the probe of the 2nd and later instances. The issue is that the driver is using the same irq_chip data structure between multiple instances. Fix this by allocating the irq_chip data structure per instance. [1] fix up message addressed by this patch [ 1.212100] gpio gpiochip9: (pcf8575): detected irqchip that is shared with multiple gpiochips: please fix the driver. Cc: Stable Signed-off-by: Roger Quadros Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pcf857x.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index adf72dda25a2..68a35b65925a 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(of, pcf857x_of_table); */ struct pcf857x { struct gpio_chip chip; + struct irq_chip irqchip; struct i2c_client *client; struct mutex lock; /* protect 'out' */ unsigned out; /* software latch */ @@ -252,18 +253,6 @@ static void pcf857x_irq_bus_sync_unlock(struct irq_data *data) mutex_unlock(&gpio->lock); } -static struct irq_chip pcf857x_irq_chip = { - .name = "pcf857x", - .irq_enable = pcf857x_irq_enable, - .irq_disable = pcf857x_irq_disable, - .irq_ack = noop, - .irq_mask = noop, - .irq_unmask = noop, - .irq_set_wake = pcf857x_irq_set_wake, - .irq_bus_lock = pcf857x_irq_bus_lock, - .irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, -}; - /*-------------------------------------------------------------------------*/ static int pcf857x_probe(struct i2c_client *client, @@ -376,8 +365,17 @@ static int pcf857x_probe(struct i2c_client *client, /* Enable irqchip if we have an interrupt */ if (client->irq) { + gpio->irqchip.name = "pcf857x", + gpio->irqchip.irq_enable = pcf857x_irq_enable, + gpio->irqchip.irq_disable = pcf857x_irq_disable, + gpio->irqchip.irq_ack = noop, + gpio->irqchip.irq_mask = noop, + gpio->irqchip.irq_unmask = noop, + gpio->irqchip.irq_set_wake = pcf857x_irq_set_wake, + gpio->irqchip.irq_bus_lock = pcf857x_irq_bus_lock, + gpio->irqchip.irq_bus_sync_unlock = pcf857x_irq_bus_sync_unlock, status = gpiochip_irqchip_add_nested(&gpio->chip, - &pcf857x_irq_chip, + &gpio->irqchip, 0, handle_level_irq, IRQ_TYPE_NONE); if (status) { @@ -392,7 +390,7 @@ static int pcf857x_probe(struct i2c_client *client, if (status) goto fail; - gpiochip_set_nested_irqchip(&gpio->chip, &pcf857x_irq_chip, + gpiochip_set_nested_irqchip(&gpio->chip, &gpio->irqchip, client->irq); gpio->irq_parent = client->irq; } From 1033be58992f818dc564196ded2bcc3f360bc297 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 4 Jan 2019 11:24:20 +0100 Subject: [PATCH 0126/1436] gpiolib: fix line event timestamps for nested irqs Nested interrupts run inside the calling thread's context and the top half handler is never called which means that we never read the timestamp. This issue came up when trying to read line events from a gpiochip using regmap_irq_chip for interrupts. Fix it by reading the timestamp from the irq thread function if it's still 0 by the time the second handler is called. Fixes: d58f2bf261fd ("gpio: Timestamp events in hardirq handler") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 1651d7f0a303..d1adfdf50fb3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -828,7 +828,14 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p) /* Do not leak kernel stack to userspace */ memset(&ge, 0, sizeof(ge)); - ge.timestamp = le->timestamp; + /* + * We may be running from a nested threaded interrupt in which case + * we didn't get the timestamp from lineevent_irq_handler(). + */ + if (!le->timestamp) + ge.timestamp = ktime_get_real_ns(); + else + ge.timestamp = le->timestamp; if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) { From 09d158d52d2bceda736797a61b6c13d7fc83707b Mon Sep 17 00:00:00 2001 From: Neo Hou Date: Wed, 16 Jan 2019 13:06:13 +0800 Subject: [PATCH 0127/1436] gpio: sprd: Fix the incorrect data register Since differnt type EICs have its own data register to read, thus fix the incorrect data register. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Cc: Signed-off-by: Neo Hou Signed-off-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-eic-sprd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index e0d6a0a7bc69..257df59815c9 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -180,7 +180,18 @@ static void sprd_eic_free(struct gpio_chip *chip, unsigned int offset) static int sprd_eic_get(struct gpio_chip *chip, unsigned int offset) { - return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA); + struct sprd_eic *sprd_eic = gpiochip_get_data(chip); + + switch (sprd_eic->type) { + case SPRD_EIC_DEBOUNCE: + return sprd_eic_read(chip, offset, SPRD_EIC_DBNC_DATA); + case SPRD_EIC_ASYNC: + return sprd_eic_read(chip, offset, SPRD_EIC_ASYNC_DATA); + case SPRD_EIC_SYNC: + return sprd_eic_read(chip, offset, SPRD_EIC_SYNC_DATA); + default: + return -ENOTSUPP; + } } static int sprd_eic_direction_input(struct gpio_chip *chip, unsigned int offset) From f785ffb61605734b518afa766d1b5445e9f38c8d Mon Sep 17 00:00:00 2001 From: Neo Hou Date: Wed, 16 Jan 2019 13:06:14 +0800 Subject: [PATCH 0128/1436] gpio: sprd: Fix incorrect irq type setting for the async EIC When setting async EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the SPRD_EIC_ASYNC_INTMODE register to 0, which means detecting edge signals. Thus this patch fixes the issue. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Cc: Signed-off-by: Neo Hou Signed-off-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-eic-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index 257df59815c9..e41223c05f6e 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -379,6 +379,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); irq_set_handler_locked(data, handle_edge_irq); break; From 2095a45e345e669ea77a9b34bdd7de5ceb422f93 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 23 Jan 2019 08:00:57 +0800 Subject: [PATCH 0129/1436] gpio: altera-a10sr: Set proper output level for direction_output The altr_a10sr_gpio_direction_output should set proper output level based on the value argument. Fixes: 26a48c4cc2f1 ("gpio: altera-a10sr: Add A10 System Resource Chip GPIO support.") Cc: Signed-off-by: Axel Lin Tested by: Thor Thayer Reviewed by: Thor Thayer Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-altera-a10sr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 6b11f1314248..7f9e0304b510 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -66,8 +66,10 @@ static int altr_a10sr_gpio_direction_input(struct gpio_chip *gc, static int altr_a10sr_gpio_direction_output(struct gpio_chip *gc, unsigned int nr, int value) { - if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) + if (nr <= (ALTR_A10SR_OUT_VALID_RANGE_HI - ALTR_A10SR_LED_VALID_SHIFT)) { + altr_a10sr_gpio_set(gc, nr, value); return 0; + } return -EINVAL; } From ba16adeb346387eb2d1ada69003588be96f098fa Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sat, 29 Dec 2018 13:10:06 +0800 Subject: [PATCH 0130/1436] ARM: pxa: ssp: unneeded to free devm_ allocated data devm_ allocated data will be automatically freed. The free of devm_ allocated data is invalid. Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: Peng Hao [title's prefix changed] Signed-off-by: Robert Jarzmik --- arch/arm/plat-pxa/ssp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ed36dcab80f1..f51919974183 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -190,8 +190,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV; - iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -201,7 +199,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock); - kfree(ssp); return 0; } From 99fd6e875d0c24448a5e2c241422a691be46b241 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Thu, 3 Jan 2019 11:32:33 +0800 Subject: [PATCH 0131/1436] RISC-V: Add _TIF_NEED_RESCHED check for kernel thread when CONFIG_PREEMPT=y The cond_resched() can be used to yield the CPU resource if CONFIG_PREEMPT is not defined. Otherwise, cond_resched() is a dummy function. In order to avoid kernel thread occupying entire CPU, when CONFIG_PREEMPT=y, the kernel thread needs to follow the rescheduling mechanism like a user thread. Signed-off-by: Vincent Chen Tested-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/asm-offsets.c | 1 + arch/riscv/kernel/entry.S | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 6a92a2fe198e..dac98348c6a3 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -39,6 +39,7 @@ void asm_offsets(void) OFFSET(TASK_STACK, task_struct, stack); OFFSET(TASK_TI, task_struct, thread_info); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); + OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 355166f57205..fd9b57c8b4ce 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -144,6 +144,10 @@ _save_context: REG_L x2, PT_SP(sp) .endm +#if !IS_ENABLED(CONFIG_PREEMPT) +.set resume_kernel, restore_all +#endif + ENTRY(handle_exception) SAVE_ALL @@ -228,7 +232,7 @@ ret_from_exception: REG_L s0, PT_SSTATUS(sp) csrc sstatus, SR_SIE andi s0, s0, SR_SPP - bnez s0, restore_all + bnez s0, resume_kernel resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ @@ -250,6 +254,18 @@ restore_all: RESTORE_ALL sret +#if IS_ENABLED(CONFIG_PREEMPT) +resume_kernel: + REG_L s0, TASK_TI_PREEMPT_COUNT(tp) + bnez s0, restore_all +need_resched: + REG_L s0, TASK_TI_FLAGS(tp) + andi s0, s0, _TIF_NEED_RESCHED + beqz s0, restore_all + call preempt_schedule_irq + j need_resched +#endif + work_pending: /* Enter slow path for supplementary processing */ la ra, ret_from_exception From 2bb10639f12c925faeed8b4c3678fb92f97b90b7 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 7 Jan 2019 15:16:35 +0100 Subject: [PATCH 0132/1436] RISC-V: fix bad use of of_node_put of_find_node_by_type already calls of_node_put, don't call it again. Fixes: 94f9bf118f ("RISC-V: Fix of_node_* refcount") Signed-off-by: Andreas Schwab Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index fc185ecabb0a..18cda0e8cf94 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -57,15 +57,12 @@ void __init setup_smp(void) while ((dn = of_find_node_by_type(dn, "cpu"))) { hart = riscv_of_processor_hartid(dn); - if (hart < 0) { - of_node_put(dn); + if (hart < 0) continue; - } if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = 1; - of_node_put(dn); continue; } @@ -73,7 +70,6 @@ void __init setup_smp(void) set_cpu_possible(cpuid, true); set_cpu_present(cpuid, true); cpuid++; - of_node_put(dn); } BUG_ON(!found_boot_cpu); From 8581f38742cf98f55e53a384a2612a1171b3600a Mon Sep 17 00:00:00 2001 From: Antony Pavlov Date: Tue, 8 Jan 2019 22:45:38 +0300 Subject: [PATCH 0133/1436] RISC-V: asm/page.h: fix spelling mistake "CONFIG_64BITS" -> "CONFIG_64BIT" There is no CONFIG_64BITS Kconfig macro. Please see arch/riscv/Kconfig for details, e.g. linux$ git grep -HnA 1 "config 64BIT" arch/riscv/Kconfig arch/riscv/Kconfig:6:config 64BIT arch/riscv/Kconfig-7- bool Signed-off-by: Antony Pavlov Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 06cfbb3aacbb..2a546a52f02a 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -80,7 +80,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) -#ifdef CONFIG_64BITS +#ifdef CONFIG_64BIT #define PTE_FMT "%016lx" #else #define PTE_FMT "%08lx" From 86cca81a31cdc6f1f292b5b0c6a4e7e06863cdc9 Mon Sep 17 00:00:00 2001 From: Antony Pavlov Date: Tue, 8 Jan 2019 22:45:57 +0300 Subject: [PATCH 0134/1436] RISC-V: Kconfig: fix spelling mistake "traget" -> "target" Signed-off-by: Antony Pavlov Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index feeeaa60697c..515fc3cc9687 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -103,7 +103,7 @@ choice prompt "Base ISA" default ARCH_RV64I help - This selects the base ISA that this kernel will traget and must match + This selects the base ISA that this kernel will target and must match the target platform. config ARCH_RV32I From a37ead8f2efb1595c15079eddac2672630f750ea Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 23 Jan 2019 12:59:17 -0800 Subject: [PATCH 0135/1436] RISC-V: defconfig: Move CONFIG_PCI{,E_XILINX} eb01d42a7778 ("PCI: consolidate PCI config entry in drivers/pci") reorganized the PCI-related Kconfig entries and resulted in a diff in our defconfig. This simply removes the diff. Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index f399659d3b8d..658be23fa2c6 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -13,8 +13,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y CONFIG_SMP=y -CONFIG_PCI=y -CONFIG_PCIE_XILINX=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_NET=y @@ -28,6 +26,8 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_PCI=y +CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y From e4cf9e47ab244977d12513bb4610a44f88ad5e68 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Sat, 12 Jan 2019 00:41:26 +0000 Subject: [PATCH 0136/1436] RISC-V: defconfig: Enable Generic PCIE by default Enable generic PCIe by default in the RISC-V defconfig, this allows us to use QEMU's PCIe support out of the box. CONFIG_RAS=y is automatically selected by generic PCIe, so it has been dropped from the defconfig. Signed-off-by: Alistair Francis [Palmer: Split out PCIE_XILINX and CRYPTO_DEV_VIRTIO] Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 658be23fa2c6..01fe50707d98 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -27,6 +27,8 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_HOST_GENERIC=y CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y @@ -63,7 +65,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y CONFIG_SIFIVE_PLIC=y -CONFIG_RAS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y From 2a200fb9fb124e2889516c4668c5483bf7107044 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 23 Jan 2019 14:09:01 -0800 Subject: [PATCH 0137/1436] RISC-V: defconfig: Add CRYPTO_DEV_VIRTIO=y This allows acceleration of cryptography inside QEMU. Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 01fe50707d98..2fd3461e50ab 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -78,5 +78,6 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y # CONFIG_RCU_TRACE is not set From a19f74708ef693d7ddc3ee577a458b6807d43e5b Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 10 Jan 2019 18:11:39 +0100 Subject: [PATCH 0138/1436] tty/serial: use uart_console_write in the RISC-V SBL early console This enables proper NLCR processing. Suggested-by: Anup Patel Signed-off-by: Andreas Schwab Reviewed-by: Anup Patel Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- drivers/tty/serial/earlycon-riscv-sbi.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c index e1a551aae336..ce81523c3113 100644 --- a/drivers/tty/serial/earlycon-riscv-sbi.c +++ b/drivers/tty/serial/earlycon-riscv-sbi.c @@ -10,13 +10,16 @@ #include #include -static void sbi_console_write(struct console *con, - const char *s, unsigned int n) +static void sbi_putc(struct uart_port *port, int c) { - int i; + sbi_console_putchar(c); +} - for (i = 0; i < n; ++i) - sbi_console_putchar(s[i]); +static void sbi_console_write(struct console *con, + const char *s, unsigned n) +{ + struct earlycon_device *dev = con->data; + uart_console_write(&dev->port, s, n, sbi_putc); } static int __init early_sbi_setup(struct earlycon_device *device, From 5b90df44fd9b415d8c5d11b92746212a63d3c47f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Jan 2019 20:01:56 +0100 Subject: [PATCH 0139/1436] ARM: dts: omap3-gta04: Fix graph_port warning We're currently getting a warning with make dtbs: arch/arm/boot/dts/omap3-gta04.dtsi:720.7-727.4: Warning (graph_port): /ocp@68000000/dss@48050000/encoder@48050c0 0/port: graph node unit address error, expected "0" Tested-by: H. Nikolaus Schaller Acked-by: Rob Herring Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-gta04.dtsi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index e53d32691308..93b420934e8e 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -714,11 +714,7 @@ vdda-supply = <&vdac>; - #address-cells = <1>; - #size-cells = <0>; - port { - reg = <0>; venc_out: endpoint { remote-endpoint = <&opa_in>; ti,channels = <1>; From ac9c908eecde3ed252cb1d67fc79b3c1346f76bc Mon Sep 17 00:00:00 2001 From: Arthur Demchenkov Date: Mon, 21 Jan 2019 06:21:09 +0300 Subject: [PATCH 0140/1436] ARM: dts: n900: fix mmc1 card detect gpio polarity Wrong polarity of card detect GPIO pin leads to the system not booting from external mmc, if the back cover of N900 is closed. When the cover is open the system boots fine. This wasn't noticed before, because of a bug, which was fixed by commit e63201f19 (mmc: omap_hsmmc: Delete platform data GPIO CD and WP). Kernels up to 4.19 ignored the card detect GPIO from DT. Fixes: e63201f19438 ("mmc: omap_hsmmc: Delete platform data GPIO CD and WP") Signed-off-by: Arthur Demchenkov Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n900.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 182a53991c90..826920e6b878 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -814,7 +814,7 @@ /* For debugging, it is often good idea to remove this GPIO. It means you can remove back cover (to reboot by removing battery) and still use the MMC card. */ - cd-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; /* 160 */ + cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ }; /* most boards use vaux3, only some old versions use vmmc2 instead */ From 063c20e12f8bbbc10cabc2413606b140085beb62 Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Tue, 22 Jan 2019 06:26:23 +0100 Subject: [PATCH 0141/1436] ARM: dts: am335x-shc.dts: fix wrong cd pin level cd pin on mmc1 is GPIO_ACTIVE_LOW not GPIO_ACTIVE_HIGH Fixes: e63201f19438 ("mmc: omap_hsmmc: Delete platform data GPIO CD and WP") Signed-off-by: Heiko Schocher Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-shc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts index d0fd68873689..5b250060f6dd 100644 --- a/arch/arm/boot/dts/am335x-shc.dts +++ b/arch/arm/boot/dts/am335x-shc.dts @@ -215,7 +215,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; bus-width = <0x4>; - cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>; cd-inverted; max-frequency = <26000000>; vmmc-supply = <&vmmcsd_fixed>; From d0243693fbf6fbd48b4efb2ba7210765983b03e3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 15 Jan 2019 10:09:38 -0800 Subject: [PATCH 0142/1436] ARM: OMAP5+: Fix inverted nirq pin interrupts with irq_set_type Commit 83a86fbb5b56 ("irqchip/gic: Loudly complain about the use of IRQ_TYPE_NONE") started warning about incorrect dts usage for irqs. ARM GIC only supports active-high interrupts for SPI (Shared Peripheral Interrupts), and the Palmas PMIC by default is active-low. Palmas PMIC allows changing the interrupt polarity using register PALMAS_POLARITY_CTRL_INT_POLARITY, but configuring sys_nirq1 with a pull-down and setting PALMAS_POLARITY_CTRL_INT_POLARITY made the Palmas RTC interrupts stop working. This can be easily tested with kernel tools rtctest.c. Turns out the SoC inverts the sys_nirq pins for GIC as they do not go through a peripheral device but go directly to the MPUSS wakeupgen. I've verified this by muxing the interrupt line temporarily to gpio_wk16 instead of sys_nirq1. with a gpio, the interrupt works fine both active-low and active-high with the SoC internal pull configured and palmas polarity configured. But as sys_nirq1, the interrupt only works when configured ACTIVE_LOW for palmas, and ACTIVE_HIGH for GIC. Note that there was a similar issue earlier with tegra114 and palmas interrupt polarity that got fixed by commit df545d1cd01a ("mfd: palmas: Provide irq flags through DT/platform data"). However, the difference between omap5 and tegra114 is that tegra inverts the palmas interrupt twice, once when entering tegra PMC, and again when exiting tegra PMC to GIC. Let's fix the issue by adding a custom wakeupgen_irq_set_type() for wakeupgen and invert any interrupts with wrong polarity. Let's also warn about any non-sysnirq pins using wrong polarity. Note that we also need to update the dts for the level as IRQ_TYPE_NONE never has irq_set_type() called, and let's add some comments and use proper pin nameing to avoid more confusion later on. Cc: Belisko Marek Cc: Dmitry Lifshitz Cc: "Dr. H. Nikolaus Schaller" Cc: Jon Hunter Cc: Keerthy Cc: Laxman Dewangan Cc: Nishanth Menon Cc: Peter Ujfalusi Cc: Richard Woodruff Cc: Santosh Shilimkar Cc: Tero Kristo Cc: Thierry Reding Cc: stable@vger.kernel.org # v4.17+ Reported-by: Belisko Marek Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 9 ++++-- arch/arm/boot/dts/omap5-cm-t54.dts | 12 +++++++- arch/arm/mach-omap2/omap-wakeupgen.c | 36 ++++++++++++++++++++++- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index bc853ebeda22..61a06f6add3c 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -317,7 +317,8 @@ palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins { pinctrl-single,pins = < - OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) /* sys_nirq1 */ + /* sys_nirq1 is pulled down as the SoC is inverting it for GIC */ + OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) >; }; @@ -385,7 +386,8 @@ palmas: palmas@48 { compatible = "ti,palmas"; - interrupts = ; /* IRQ_SYS_1N */ + /* sys_nirq/ext_sys_irq pins get inverted at mpuss wakeupgen */ + interrupts = ; reg = <0x48>; interrupt-controller; #interrupt-cells = <2>; @@ -651,7 +653,8 @@ pinctrl-names = "default"; pinctrl-0 = <&twl6040_pins>; - interrupts = ; /* IRQ_SYS_2N cascaded to gic */ + /* sys_nirq/ext_sys_irq pins get inverted at mpuss wakeupgen */ + interrupts = ; /* audpwron gpio defined in the board specific dts */ diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index 5e21fb430a65..e78d3718f145 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -181,6 +181,13 @@ OMAP5_IOPAD(0x0042, PIN_INPUT_PULLDOWN | MUX_MODE6) /* llib_wakereqin.gpio1_wk15 */ >; }; + + palmas_sys_nirq_pins: pinmux_palmas_sys_nirq_pins { + pinctrl-single,pins = < + /* sys_nirq1 is pulled down as the SoC is inverting it for GIC */ + OMAP5_IOPAD(0x068, PIN_INPUT_PULLUP | MUX_MODE0) + >; + }; }; &omap5_pmx_core { @@ -414,8 +421,11 @@ palmas: palmas@48 { compatible = "ti,palmas"; - interrupts = ; /* IRQ_SYS_1N */ reg = <0x48>; + pinctrl-0 = <&palmas_sys_nirq_pins>; + pinctrl-names = "default"; + /* sys_nirq/ext_sys_irq pins get inverted at mpuss wakeupgen */ + interrupts = ; interrupt-controller; #interrupt-cells = <2>; ti,system-power-controller; diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index fc5fb776a710..17558be4bf0a 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -50,6 +50,9 @@ #define OMAP4_NR_BANKS 4 #define OMAP4_NR_IRQS 128 +#define SYS_NIRQ1_EXT_SYS_IRQ_1 7 +#define SYS_NIRQ2_EXT_SYS_IRQ_2 119 + static void __iomem *wakeupgen_base; static void __iomem *sar_base; static DEFINE_RAW_SPINLOCK(wakeupgen_lock); @@ -153,6 +156,37 @@ static void wakeupgen_unmask(struct irq_data *d) irq_chip_unmask_parent(d); } +/* + * The sys_nirq pins bypass peripheral modules and are wired directly + * to MPUSS wakeupgen. They get automatically inverted for GIC. + */ +static int wakeupgen_irq_set_type(struct irq_data *d, unsigned int type) +{ + bool inverted = false; + + switch (type) { + case IRQ_TYPE_LEVEL_LOW: + type &= ~IRQ_TYPE_LEVEL_MASK; + type |= IRQ_TYPE_LEVEL_HIGH; + inverted = true; + break; + case IRQ_TYPE_EDGE_FALLING: + type &= ~IRQ_TYPE_EDGE_BOTH; + type |= IRQ_TYPE_EDGE_RISING; + inverted = true; + break; + default: + break; + } + + if (inverted && d->hwirq != SYS_NIRQ1_EXT_SYS_IRQ_1 && + d->hwirq != SYS_NIRQ2_EXT_SYS_IRQ_2) + pr_warn("wakeupgen: irq%li polarity inverted in dts\n", + d->hwirq); + + return irq_chip_set_type_parent(d, type); +} + #ifdef CONFIG_HOTPLUG_CPU static DEFINE_PER_CPU(u32 [MAX_NR_REG_BANKS], irqmasks); @@ -446,7 +480,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, - .irq_set_type = irq_chip_set_type_parent, + .irq_set_type = wakeupgen_irq_set_type, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, From 28198c4639b39899a728ac89aea29d2a7a72562f Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 12 Jan 2019 16:16:27 +0800 Subject: [PATCH 0143/1436] riscv: fixup max_low_pfn with PFN_DOWN. max_low_pfn should be pfn_size not byte_size. Signed-off-by: Guo Ren Signed-off-by: Mao Han Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 2 +- arch/riscv/mm/init.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 6e079e94b638..77564310235f 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -181,7 +181,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = memblock_end_of_DRAM(); + max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 1d9bfaff60bc..658ebf645f42 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -28,7 +28,8 @@ static void __init zone_sizes_init(void) unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, + (unsigned long) PFN_PHYS(max_low_pfn))); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; From a2fcd5c84f7a7825e028381b10182439067aa90d Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 23 Jan 2019 15:28:59 +0800 Subject: [PATCH 0144/1436] drm/modes: Prevent division by zero htotal This patch prevents division by zero htotal. In a follow-up mail Tina writes: > > How did you manage to get here with htotal == 0? This needs backtraces (or if > > this is just about static checkers, a mention of that). > > -Daniel > > In GVT-g, we are trying to enable a virtual display w/o setting timings for a pipe > (a.k.a htotal=0), then we met the following kernel panic: > > [ 32.832048] divide error: 0000 [#1] SMP PTI > [ 32.833614] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc4-sriov+ #33 > [ 32.834438] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.10.1-0-g8891697-dirty-20180511_165818-tinazhang-linux-1 04/01/2014 > [ 32.835901] RIP: 0010:drm_mode_hsync+0x1e/0x40 > [ 32.836004] Code: 31 c0 c3 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 8b 87 d8 00 00 00 85 c0 75 22 8b 4f 68 85 c9 78 1b 69 47 58 e8 03 00 00 99 f9 b9 d3 4d 62 10 05 f4 01 00 00 f7 e1 89 d0 c1 e8 06 f3 c3 66 > [ 32.836004] RSP: 0000:ffffc900000ebb90 EFLAGS: 00010206 > [ 32.836004] RAX: 0000000000000000 RBX: ffff88001c67c8a0 RCX: 0000000000000000 > [ 32.836004] RDX: 0000000000000000 RSI: ffff88001c67c000 RDI: ffff88001c67c8a0 > [ 32.836004] RBP: ffff88001c7d03a0 R08: ffff88001c67c8a0 R09: ffff88001c7d0330 > [ 32.836004] R10: ffffffff822c3a98 R11: 0000000000000001 R12: ffff88001c67c000 > [ 32.836004] R13: ffff88001c7d0370 R14: ffffffff8207eb78 R15: ffff88001c67c800 > [ 32.836004] FS: 0000000000000000(0000) GS:ffff88001da00000(0000) knlGS:0000000000000000 > [ 32.836004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 32.836004] CR2: 0000000000000000 CR3: 000000000220a000 CR4: 00000000000006f0 > [ 32.836004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 32.836004] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 32.836004] Call Trace: > [ 32.836004] intel_mode_from_pipe_config+0x72/0x90 > [ 32.836004] intel_modeset_setup_hw_state+0x569/0xf90 > [ 32.836004] intel_modeset_init+0x905/0x1db0 > [ 32.836004] i915_driver_load+0xb8c/0x1120 > [ 32.836004] i915_pci_probe+0x4d/0xb0 > [ 32.836004] local_pci_probe+0x44/0xa0 > [ 32.836004] ? pci_assign_irq+0x27/0x130 > [ 32.836004] pci_device_probe+0x102/0x1c0 > [ 32.836004] driver_probe_device+0x2b8/0x480 > [ 32.836004] __driver_attach+0x109/0x110 > [ 32.836004] ? driver_probe_device+0x480/0x480 > [ 32.836004] bus_for_each_dev+0x67/0xc0 > [ 32.836004] ? klist_add_tail+0x3b/0x70 > [ 32.836004] bus_add_driver+0x1e8/0x260 > [ 32.836004] driver_register+0x5b/0xe0 > [ 32.836004] ? mipi_dsi_bus_init+0x11/0x11 > [ 32.836004] do_one_initcall+0x4d/0x1eb > [ 32.836004] kernel_init_freeable+0x197/0x237 > [ 32.836004] ? rest_init+0xd0/0xd0 > [ 32.836004] kernel_init+0xa/0x110 > [ 32.836004] ret_from_fork+0x35/0x40 > [ 32.836004] Modules linked in: > [ 32.859183] ---[ end trace 525608b0ed0e8665 ]--- > [ 32.859722] RIP: 0010:drm_mode_hsync+0x1e/0x40 > [ 32.860287] Code: 31 c0 c3 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 8b 87 d8 00 00 00 85 c0 75 22 8b 4f 68 85 c9 78 1b 69 47 58 e8 03 00 00 99 f9 b9 d3 4d 62 10 05 f4 01 00 00 f7 e1 89 d0 c1 e8 06 f3 c3 66 > [ 32.862680] RSP: 0000:ffffc900000ebb90 EFLAGS: 00010206 > [ 32.863309] RAX: 0000000000000000 RBX: ffff88001c67c8a0 RCX: 0000000000000000 > [ 32.864182] RDX: 0000000000000000 RSI: ffff88001c67c000 RDI: ffff88001c67c8a0 > [ 32.865206] RBP: ffff88001c7d03a0 R08: ffff88001c67c8a0 R09: ffff88001c7d0330 > [ 32.866359] R10: ffffffff822c3a98 R11: 0000000000000001 R12: ffff88001c67c000 > [ 32.867213] R13: ffff88001c7d0370 R14: ffffffff8207eb78 R15: ffff88001c67c800 > [ 32.868075] FS: 0000000000000000(0000) GS:ffff88001da00000(0000) knlGS:0000000000000000 > [ 32.868983] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 32.869659] CR2: 0000000000000000 CR3: 000000000220a000 CR4: 00000000000006f0 > [ 32.870599] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 32.871598] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 32.872549] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b > > Since drm_mode_hsync() has the logic to check mode->htotal, I just extend it to cover the case htotal==0. Signed-off-by: Tina Zhang Cc: Adam Jackson Cc: Dave Airlie Cc: Daniel Vetter [danvet: Add additional explanations + cc: stable.] Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1548228539-3061-1-git-send-email-tina.zhang@intel.com --- drivers/gpu/drm/drm_modes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 24a750436559..f91e02c87fd8 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -758,7 +758,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode) if (mode->hsync) return mode->hsync; - if (mode->htotal < 0) + if (mode->htotal <= 0) return 0; calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */ From e355477ed9e4f401e3931043df97325d38552d54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Jan 2019 16:33:10 -0800 Subject: [PATCH 0145/1436] net/mlx5: Make mlx5_cmd_exec_cb() a safe API APIs that have deferred callbacks should have some kind of cleanup function that callers can use to fence the callbacks. Otherwise things like module unloading can lead to dangling function pointers, or worse. The IB MR code is the only place that calls this function and had a really poor attempt at creating this fence. Provide a good version in the core code as future patches will add more places that need this fence. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/mr.c | 39 +++---------- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 +++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 11 ++-- include/linux/mlx5/driver.h | 32 +++++++++-- 5 files changed, 91 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efe383c0ac86..eedba0d2ec4b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { @@ -944,6 +945,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..bf2b6ea23851 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..a25a8c6f938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4e444863054a..039c9398614c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -850,11 +850,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +904,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); From ce4eee5340a966f633fee1e8603aafd223ca394a Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 18 Jan 2019 16:33:11 -0800 Subject: [PATCH 0146/1436] net/mlx5: Add pci AtomicOps request Calling pci_enable_atomic_ops_to_root enables AtomicOp requests to pci root port. AtomicOp requests will be enabled only if the completer and all intermediate pci bridges support PCI atomic operations. This, together with appropriate settings in the NVCONFIG should enable PCI atomic operations on the device. PCI atomic operations were first introduced in PCI Express Base Specification 2.1. The Supported operations are Swap (Unconditional Swap), CAS (Compare and Swap) and FetchAdd (Fetch and Add). Unlike other atomic operation modes PCI atomic operations gives the user the option to do atomic operations on local memory, without involving verbs api, without it compromising the operation's atomicity. Signed-off-by: Michael Guralnik Reviewed-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be81b319b0dc..085e1133b8d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -693,6 +693,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { From 8fa3adb8c6beee4af079ac90b9575ab92951de3f Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Mon, 7 Jan 2019 15:06:15 +0000 Subject: [PATCH 0147/1436] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock vgic_irq->irq_lock must always be taken with interrupts disabled as it is used in interrupt context. For configurations such as PREEMPT_RT_FULL, this means that it should be a raw_spinlock since RT spinlocks are interruptible. Signed-off-by: Julien Thierry Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic/vgic-debug.c | 4 +- virt/kvm/arm/vgic/vgic-init.c | 4 +- virt/kvm/arm/vgic/vgic-its.c | 14 +++---- virt/kvm/arm/vgic/vgic-mmio-v2.c | 14 +++---- virt/kvm/arm/vgic/vgic-mmio-v3.c | 12 +++--- virt/kvm/arm/vgic/vgic-mmio.c | 34 +++++++-------- virt/kvm/arm/vgic/vgic-v2.c | 4 +- virt/kvm/arm/vgic/vgic-v3.c | 8 ++-- virt/kvm/arm/vgic/vgic.c | 71 ++++++++++++++++---------------- 10 files changed, 83 insertions(+), 84 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4f31f96bbfab..b5426052152e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -100,7 +100,7 @@ enum vgic_irq_config { }; struct vgic_irq { - spinlock_t irq_lock; /* Protects the content of the struct */ + raw_spinlock_t irq_lock; /* Protects the content of the struct */ struct list_head lpi_list; /* Used to link all LPIs together */ struct list_head ap_list; diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 07aa900bac56..1f62f2b8065d 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -251,9 +251,9 @@ static int vgic_debug_show(struct seq_file *s, void *v) return 0; } - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(kvm, irq); return 0; diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index c0c0b88af1d5..1128e97406cf 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -171,7 +171,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) irq->intid = i + VGIC_NR_PRIVATE_IRQS; INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); + raw_spin_lock_init(&irq->irq_lock); irq->vcpu = NULL; irq->target_vcpu = vcpu0; kref_init(&irq->refcount); @@ -216,7 +216,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); + raw_spin_lock_init(&irq->irq_lock); irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index eb2a390a6c86..911ba61505ee 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -65,7 +65,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, INIT_LIST_HEAD(&irq->lpi_list); INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); + raw_spin_lock_init(&irq->irq_lock); irq->config = VGIC_CONFIG_EDGE; kref_init(&irq->refcount); @@ -287,7 +287,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, if (ret) return ret; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { irq->priority = LPI_PROP_PRIORITY(prop); @@ -299,7 +299,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, } } - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); if (irq->hw) return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); @@ -352,9 +352,9 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) int ret = 0; unsigned long flags; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->target_vcpu = vcpu; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); if (irq->hw) { struct its_vlpi_map map; @@ -455,7 +455,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); @@ -612,7 +612,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, true); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq, flags); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 738b65d2d0e7..b535fffc7400 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -147,7 +147,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; irq->source |= 1U << source_vcpu->vcpu_id; @@ -191,13 +191,13 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); int target; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->targets = (val >> (i * 8)) & cpu_mask; target = irq->targets ? __ffs(irq->targets) : 0; irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -230,13 +230,13 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->source &= ~((val >> (i * 8)) & 0xff); if (!irq->source) irq->pending_latch = false; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -252,7 +252,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->source |= (val >> (i * 8)) & 0xff; @@ -260,7 +260,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index b3d1f0985117..4a12322bf7df 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -169,13 +169,13 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (!irq) return; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); /* We only care about and preserve Aff0, Aff1 and Aff2. */ irq->mpidr = val & GENMASK(23, 0); irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -281,7 +281,7 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i, &val)) { /* * pending_latch is set irrespective of irq type @@ -292,7 +292,7 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { irq->pending_latch = false; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); @@ -957,7 +957,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); /* * An access targetting Group0 SGIs can only generate @@ -968,7 +968,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ceeda7e04a4d..7de42fba05b5 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -77,7 +77,7 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -120,7 +120,7 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -139,11 +139,11 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -160,10 +160,10 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); unsigned long flags; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq_is_pending(irq)) value |= (1U << i); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -215,7 +215,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) vgic_hw_irq_spending(vcpu, irq, is_uaccess); else @@ -262,14 +262,14 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) vgic_hw_irq_cpending(vcpu, irq, is_uaccess); else irq->pending_latch = false; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -311,7 +311,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, unsigned long flags; struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) { vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); @@ -342,7 +342,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, if (irq->active) vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } /* @@ -485,10 +485,10 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -534,14 +534,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, continue; irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) irq->config = VGIC_CONFIG_EDGE; else irq->config = VGIC_CONFIG_LEVEL; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -590,12 +590,12 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, * restore irq config before line level. */ new_level = !!(val & (1U << i)); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->line_level = new_level; if (new_level) vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 69b892abd7dc..d91a8938aa7c 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -84,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* Always preserve the active bit */ irq->active = !!(val & GICH_LR_ACTIVE_BIT); @@ -127,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) vgic_irq_set_phys_active(irq, false); } - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 9c0dd234ebe8..4ee0aeb9a905 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -76,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) if (!irq) /* An LPI could have been unmapped. */ continue; - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* Always preserve the active bit */ irq->active = !!(val & ICH_LR_ACTIVE_BIT); @@ -119,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) vgic_irq_set_phys_active(irq, false); } - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } @@ -347,9 +347,9 @@ retry: status = val & (1 << bit_nr); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->target_vcpu != vcpu) { - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); goto retry; } irq->pending_latch = status; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 870b1185173b..bc36f2e68f5a 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -244,8 +244,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) bool penda, pendb; int ret; - spin_lock(&irqa->irq_lock); - spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&irqa->irq_lock); + raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); if (irqa->active || irqb->active) { ret = (int)irqb->active - (int)irqa->active; @@ -263,8 +263,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) /* Both pending and enabled, sort by priority */ ret = irqa->priority - irqb->priority; out: - spin_unlock(&irqb->irq_lock); - spin_unlock(&irqa->irq_lock); + raw_spin_unlock(&irqb->irq_lock); + raw_spin_unlock(&irqa->irq_lock); return ret; } @@ -325,7 +325,7 @@ retry: * not need to be inserted into an ap_list and there is also * no more work for us to do. */ - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); /* * We have to kick the VCPU here, because we could be @@ -347,12 +347,12 @@ retry: * We must unlock the irq lock to take the ap_list_lock where * we are going to insert this new pending interrupt. */ - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); /* someone can do stuff here, which we re-check below */ spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* * Did something change behind our backs? @@ -367,10 +367,10 @@ retry: */ if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); goto retry; } @@ -382,7 +382,7 @@ retry: list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); irq->vcpu = vcpu; - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -430,11 +430,11 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, if (!irq) return -EINVAL; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(kvm, irq); return 0; } @@ -494,9 +494,9 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return ret; @@ -519,11 +519,11 @@ void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) if (!irq->hw) goto out; - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->active = false; irq->pending_latch = false; irq->line_level = false; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); out: vgic_put_irq(vcpu->kvm, irq); } @@ -539,9 +539,9 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); BUG_ON(!irq); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); kvm_vgic_unmap_irq(irq); - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return 0; @@ -571,12 +571,12 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) return -EINVAL; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->owner && irq->owner != owner) ret = -EEXIST; else irq->owner = owner; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); return ret; } @@ -603,7 +603,7 @@ retry: struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; bool target_vcpu_needs_kick = false; - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); BUG_ON(vcpu != irq->vcpu); @@ -616,7 +616,7 @@ retry: */ list_del(&irq->ap_list); irq->vcpu = NULL; - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); /* * This vgic_put_irq call matches the @@ -631,13 +631,13 @@ retry: if (target_vcpu == vcpu) { /* We're on the right CPU */ - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); continue; } /* This interrupt looks like it has to be migrated. */ - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); spin_unlock(&vgic_cpu->ap_list_lock); /* @@ -655,7 +655,7 @@ retry: spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, SINGLE_DEPTH_NESTING); - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* * If the affinity has been preserved, move the @@ -675,7 +675,7 @@ retry: target_vcpu_needs_kick = true; } - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); @@ -741,10 +741,10 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu, list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { int w; - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* GICv2 SGIs can count for more than one... */ w = vgic_irq_get_lr_count(irq); - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); count += w; *multi_sgi |= (w > 1); @@ -770,7 +770,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) count = 0; list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); /* * If we have multi-SGIs in the pipeline, we need to @@ -780,7 +780,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) * the AP list has been sorted already. */ if (multi_sgi && irq->priority > prio) { - spin_unlock(&irq->irq_lock); + _raw_spin_unlock(&irq->irq_lock); break; } @@ -791,7 +791,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) prio = irq->priority; } - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); if (count == kvm_vgic_global_state.nr_lr) { if (!list_is_last(&irq->ap_list, @@ -921,11 +921,11 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { - spin_lock(&irq->irq_lock); + raw_spin_lock(&irq->irq_lock); pending = irq_is_pending(irq) && irq->enabled && !irq->active && irq->priority < vmcr.pmr; - spin_unlock(&irq->irq_lock); + raw_spin_unlock(&irq->irq_lock); if (pending) break; @@ -963,11 +963,10 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) return false; irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); - spin_lock_irqsave(&irq->irq_lock, flags); + raw_spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; - spin_unlock_irqrestore(&irq->irq_lock, flags); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return map_is_active; } - From fc3bc475231e12e9c0142f60100cf84d077c79e1 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Mon, 7 Jan 2019 15:06:16 +0000 Subject: [PATCH 0148/1436] KVM: arm/arm64: vgic: Make vgic_dist->lpi_list_lock a raw_spinlock vgic_dist->lpi_list_lock must always be taken with interrupts disabled as it is used in interrupt context. For configurations such as PREEMPT_RT_FULL, this means that it should be a raw_spinlock since RT spinlocks are interruptible. Signed-off-by: Julien Thierry Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic/vgic-init.c | 2 +- virt/kvm/arm/vgic/vgic-its.c | 8 ++++---- virt/kvm/arm/vgic/vgic.c | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b5426052152e..32954e115796 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -256,7 +256,7 @@ struct vgic_dist { u64 propbaser; /* Protects the lpi_list and the count value below. */ - spinlock_t lpi_list_lock; + raw_spinlock_t lpi_list_lock; struct list_head lpi_list_head; int lpi_list_count; diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 1128e97406cf..330c1ada7326 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -64,7 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; INIT_LIST_HEAD(&dist->lpi_list_head); - spin_lock_init(&dist->lpi_list_lock); + raw_spin_lock_init(&dist->lpi_list_lock); } /* CREATION */ diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 911ba61505ee..ab3f47745d9c 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -73,7 +73,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - spin_lock_irqsave(&dist->lpi_list_lock, flags); + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -101,7 +101,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, dist->lpi_list_count++; out_unlock: - spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); /* * We "cache" the configuration table entries in our struct vgic_irq's. @@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) if (!intids) return -ENOMEM; - spin_lock_irqsave(&dist->lpi_list_lock, flags); + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (i == irq_count) break; @@ -341,7 +341,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) continue; intids[i++] = irq->intid; } - spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); *intid_ptr = intids; return i; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index bc36f2e68f5a..ea54a1923c4f 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -72,7 +72,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) struct vgic_irq *irq = NULL; unsigned long flags; - spin_lock_irqsave(&dist->lpi_list_lock, flags); + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (irq->intid != intid) @@ -88,7 +88,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) irq = NULL; out_unlock: - spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return irq; } @@ -138,15 +138,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) if (irq->intid < VGIC_MIN_LPI) return; - spin_lock_irqsave(&dist->lpi_list_lock, flags); + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); if (!kref_put(&irq->refcount, vgic_irq_release)) { - spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return; }; list_del(&irq->lpi_list); dist->lpi_list_count--; - spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); kfree(irq); } From e08d8d296079e8fd7eefd53f73dcafebd3a5bf9f Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Mon, 7 Jan 2019 15:06:17 +0000 Subject: [PATCH 0149/1436] KVM: arm/arm64: vgic: Make vgic_cpu->ap_list_lock a raw_spinlock vgic_cpu->ap_list_lock must always be taken with interrupts disabled as it is used in interrupt context. For configurations such as PREEMPT_RT_FULL, this means that it should be a raw_spinlock since RT spinlocks are interruptible. Signed-off-by: Julien Thierry Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic/vgic-init.c | 2 +- virt/kvm/arm/vgic/vgic.c | 37 ++++++++++++++++++----------------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 32954e115796..c36c86f1ec9a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -307,7 +307,7 @@ struct vgic_cpu { unsigned int used_lrs; struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; - spinlock_t ap_list_lock; /* Protects the ap_list */ + raw_spinlock_t ap_list_lock; /* Protects the ap_list */ /* * List of IRQs that this VCPU should consider because they are either diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 330c1ada7326..dfbfcb1fe933 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -206,7 +206,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); + raw_spin_lock_init(&vgic_cpu->ap_list_lock); /* * Enable and configure all SGIs to be edge-triggered and diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index ea54a1923c4f..abd9c7352677 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -54,11 +54,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * When taking more than one ap_list_lock at the same time, always take the * lowest numbered VCPU's ap_list_lock first, so: * vcpuX->vcpu_id < vcpuY->vcpu_id: - * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); - * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); + * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); * * Since the VGIC must support injecting virtual interrupts from ISRs, we have - * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer + * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer * spinlocks for any lock that may be taken while injecting an interrupt. */ @@ -351,7 +351,7 @@ retry: /* someone can do stuff here, which we re-check below */ - spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); raw_spin_lock(&irq->irq_lock); /* @@ -368,7 +368,8 @@ retry: if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { raw_spin_unlock(&irq->irq_lock); - spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, + flags); raw_spin_lock_irqsave(&irq->irq_lock, flags); goto retry; @@ -383,7 +384,7 @@ retry: irq->vcpu = vcpu; raw_spin_unlock(&irq->irq_lock); - spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); @@ -597,7 +598,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); retry: - spin_lock(&vgic_cpu->ap_list_lock); + raw_spin_lock(&vgic_cpu->ap_list_lock); list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; @@ -638,7 +639,7 @@ retry: /* This interrupt looks like it has to be migrated. */ raw_spin_unlock(&irq->irq_lock); - spin_unlock(&vgic_cpu->ap_list_lock); + raw_spin_unlock(&vgic_cpu->ap_list_lock); /* * Ensure locking order by always locking the smallest @@ -652,9 +653,9 @@ retry: vcpuB = vcpu; } - spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); - spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, - SINGLE_DEPTH_NESTING); + raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, + SINGLE_DEPTH_NESTING); raw_spin_lock(&irq->irq_lock); /* @@ -676,8 +677,8 @@ retry: } raw_spin_unlock(&irq->irq_lock); - spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); - spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); if (target_vcpu_needs_kick) { kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); @@ -687,7 +688,7 @@ retry: goto retry; } - spin_unlock(&vgic_cpu->ap_list_lock); + raw_spin_unlock(&vgic_cpu->ap_list_lock); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) @@ -872,9 +873,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); - spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); @@ -918,7 +919,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) vgic_get_vmcr(vcpu, &vmcr); - spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { raw_spin_lock(&irq->irq_lock); @@ -931,7 +932,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) break; } - spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); return pending; } From 9825bd94e3a2baae1f4874767ae3a7d4c049720e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 24 Jan 2019 04:16:45 +0000 Subject: [PATCH 0150/1436] iommu/amd: Fix IOMMU page flush when detach device from a domain When a VM is terminated, the VFIO driver detaches all pass-through devices from VFIO domain by clearing domain id and page table root pointer from each device table entry (DTE), and then invalidates the DTE. Then, the VFIO driver unmap pages and invalidate IOMMU pages. Currently, the IOMMU driver keeps track of which IOMMU and how many devices are attached to the domain. When invalidate IOMMU pages, the driver checks if the IOMMU is still attached to the domain before issuing the invalidate page command. However, since VFIO has already detached all devices from the domain, the subsequent INVALIDATE_IOMMU_PAGES commands are being skipped as there is no IOMMU attached to the domain. This results in data corruption and could cause the PCI device to end up in indeterministic state. Fix this by invalidate IOMMU pages when detach a device, and before decrementing the per-domain device reference counts. Cc: Boris Ostrovsky Suggested-by: Joerg Roedel Co-developed-by: Brijesh Singh Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit Fixes: 6de8ad9b9ee0 ('x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 843b825cc1c4..2a7b78bb98b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1991,16 +1991,13 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias; iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -2010,6 +2007,16 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; } /* From 45383fb0f42db3945ac6cc658704706cdae19528 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 23 Jan 2019 17:50:26 +0800 Subject: [PATCH 0151/1436] virtio: support VIRTIO_F_ORDER_PLATFORM This patch introduces the support for VIRTIO_F_ORDER_PLATFORM. If this feature is negotiated, the driver must use the barriers suitable for hardware devices. Otherwise, the device and driver are assumed to be implemented in software, that is they can be assumed to run on identical CPUs in an SMP configuration. Thus a weaker form of memory barriers is sufficient to yield better performance. It is recommended that an add-in card based PCI device offers this feature for portability. The device will fail to operate further or will operate in a slower emulation mode if this feature is offered but not accepted. Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 8 ++++++++ include/uapi/linux/virtio_config.h | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cd7e755484e3..27d3f057493e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1609,6 +1609,9 @@ static struct virtqueue *vring_create_virtqueue_packed( !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) + vq->weak_barriers = false; + vq->packed.ring_dma_addr = ring_dma_addr; vq->packed.driver_event_dma_addr = driver_event_dma_addr; vq->packed.device_event_dma_addr = device_event_dma_addr; @@ -2079,6 +2082,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); + if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) + vq->weak_barriers = false; + vq->split.queue_dma_addr = 0; vq->split.queue_size_in_bytes = 0; @@ -2213,6 +2219,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_RING_PACKED: break; + case VIRTIO_F_ORDER_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 1196e1c1d4f6..ff8e7dc9d4dd 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -78,6 +78,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * This feature indicates that memory accesses by the driver and the + * device are ordered in a way described by the platform. + */ +#define VIRTIO_F_ORDER_PLATFORM 36 + /* * Does the device support Single Root I/O Virtualization? */ From 0113613faf0214b5e04ccf9149c330ee67f9779c Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 21 Jan 2019 07:22:54 +0100 Subject: [PATCH 0152/1436] Revert "Input: olpc_apsp - enable the SP clock" Turns out this is not such a great idea. Once the SP clock is disabled, it's not sufficient to just enable in order to bring the SP core back up. It seems that the kernel has no business managing this clock. Just let the firmware keep it enabled. This reverts commit ed22cee91a88c47e564478b012fdbcb079653499. Link: https://lore.kernel.org/lkml/154783267051.169631.3197836544646625747@swboyd.mtv.corp.google.com/ Signed-off-by: Lubomir Rintel Acked-by: Dmitry Torokhov Signed-off-by: Stephen Boyd --- .../devicetree/bindings/serio/olpc,ap-sp.txt | 4 ---- drivers/input/serio/olpc_apsp.c | 14 -------------- 2 files changed, 18 deletions(-) diff --git a/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt index 36603419d6f8..0e72183f52bc 100644 --- a/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt +++ b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt @@ -4,14 +4,10 @@ Required properties: - compatible : "olpc,ap-sp" - reg : base address and length of SoC's WTM registers - interrupts : SP-AP interrupt -- clocks : phandle + clock-specifier for the clock that drives the WTM -- clock-names: should be "sp" Example: ap-sp@d4290000 { compatible = "olpc,ap-sp"; reg = <0xd4290000 0x1000>; interrupts = <40>; - clocks = <&soc_clocks MMP2_CLK_SP>; - clock-names = "sp"; } diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c index b36084710f69..698003309d2c 100644 --- a/drivers/input/serio/olpc_apsp.c +++ b/drivers/input/serio/olpc_apsp.c @@ -23,7 +23,6 @@ #include #include #include -#include /* * The OLPC XO-1.75 and XO-4 laptops do not have a hardware PS/2 controller. @@ -75,7 +74,6 @@ struct olpc_apsp { struct serio *kbio; struct serio *padio; void __iomem *base; - struct clk *clk; int open_count; int irq; }; @@ -148,17 +146,11 @@ static int olpc_apsp_open(struct serio *port) struct olpc_apsp *priv = port->port_data; unsigned int tmp; unsigned long l; - int error; if (priv->open_count++ == 0) { - error = clk_prepare_enable(priv->clk); - if (error) - return error; - l = readl(priv->base + COMMAND_FIFO_STATUS); if (!(l & CMD_STS_MASK)) { dev_err(priv->dev, "SP cannot accept commands.\n"); - clk_disable_unprepare(priv->clk); return -EIO; } @@ -179,8 +171,6 @@ static void olpc_apsp_close(struct serio *port) /* Disable interrupt 0 */ tmp = readl(priv->base + PJ_INTERRUPT_MASK); writel(tmp | INT_0, priv->base + PJ_INTERRUPT_MASK); - - clk_disable_unprepare(priv->clk); } } @@ -206,10 +196,6 @@ static int olpc_apsp_probe(struct platform_device *pdev) if (priv->irq < 0) return priv->irq; - priv->clk = devm_clk_get(&pdev->dev, "sp"); - if (IS_ERR(priv->clk)) - return PTR_ERR(priv->clk); - /* KEYBOARD */ kb_serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!kb_serio) From 0acb69e7b904135ff540bc483942c0dba40ecdb9 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 21 Jan 2019 07:22:55 +0100 Subject: [PATCH 0153/1436] Revert "clk: mmp2: add SP clock" It seems that the kernel has no business managing this clock: once the SP clock is disabled, it's not sufficient to just enable in order to bring the SP core back up. Just let the firmware keep it enabled and don't expose it to drivers. This reverts commit fc27c2394d96fd19854b7e2d3f0e60df0d86fc90. Link: https://lore.kernel.org/lkml/154783267051.169631.3197836544646625747@swboyd.mtv.corp.google.com/ Signed-off-by: Lubomir Rintel Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-of-mmp2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index 61fefc046ec5..d083b860f083 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -53,7 +53,6 @@ #define APMU_DISP1 0x110 #define APMU_CCIC0 0x50 #define APMU_CCIC1 0xf4 -#define APMU_SP 0x68 #define MPMU_UART_PLL 0x14 struct mmp2_clk_unit { @@ -210,8 +209,6 @@ static struct mmp_clk_mix_config ccic1_mix_config = { .reg_info = DEFINE_MIX_REG_INFO(4, 16, 2, 6, 32), }; -static DEFINE_SPINLOCK(sp_lock); - static struct mmp_param_mux_clk apmu_mux_clks[] = { {MMP2_CLK_DISP0_MUX, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 2, 0, &disp0_lock}, {MMP2_CLK_DISP1_MUX, "disp1_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP1, 6, 2, 0, &disp1_lock}, @@ -242,7 +239,6 @@ static struct mmp_param_gate_clk apmu_gate_clks[] = { {MMP2_CLK_CCIC1, "ccic1_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x1b, 0x1b, 0x0, 0, &ccic1_lock}, {MMP2_CLK_CCIC1_PHY, "ccic1_phy_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x24, 0x24, 0x0, 0, &ccic1_lock}, {MMP2_CLK_CCIC1_SPHY, "ccic1_sphy_clk", "ccic1_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x300, 0x300, 0x0, 0, &ccic1_lock}, - {MMP2_CLK_SP, "sp_clk", NULL, CLK_SET_RATE_PARENT, APMU_SP, 0x1b, 0x1b, 0x0, 0, &sp_lock}, }; static void mmp2_axi_periph_clk_init(struct mmp2_clk_unit *pxa_unit) From 401fbb34f53e20a8d4734136a2d02ff1ee76f86e Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 21 Jan 2019 07:22:56 +0100 Subject: [PATCH 0154/1436] Revert "dt-bindings: marvell,mmp2: Add clock id for the SP clock" It seems that the kernel has no business managing this clock: once the SP clock is disabled, it's not sufficient to just enable it in order to bring the SP core back up. Pretty sure nothing ever used this and it's safe to remove. This reverts commit e8a2c779141415105825e65a4715f1130bba61b1. Signed-off-by: Lubomir Rintel Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/marvell,mmp2.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/dt-bindings/clock/marvell,mmp2.h b/include/dt-bindings/clock/marvell,mmp2.h index 7b24fc791146..228a5e234af0 100644 --- a/include/dt-bindings/clock/marvell,mmp2.h +++ b/include/dt-bindings/clock/marvell,mmp2.h @@ -71,7 +71,6 @@ #define MMP2_CLK_CCIC1_MIX 117 #define MMP2_CLK_CCIC1_PHY 118 #define MMP2_CLK_CCIC1_SPHY 119 -#define MMP2_CLK_SP 120 #define MMP2_NR_CLKS 200 #endif From ede77858473ae4cab6f8f147efcaa76989761535 Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Thu, 20 Dec 2018 16:31:00 -0800 Subject: [PATCH 0155/1436] clk: Remove global clk traversal on fetch parent index It's not required to traverse the entire clk tree when the parents array contains a NULL value. We already have the parent clk_core pointer, so we can just compare the parent->name and parent_names[i] pointers. This can be a substantial power improvement in cases where the parent clk isn't known and that clk is never registered, because a mux having an unregistered parent name may traverse the clk tree on every clk_set_rate() call in clk_mux_determine_rate_flags(). This can happen hundreds of times a second for CPU clks. This patch is the combination of reverting commit 470b5e2f97cf ("clk: simplify clk_fetch_parent_index() function") and optimizing the resulting code to never call __clk_lookup() because we already have the clk_core pointer we're looking for. That optimization went unnoticed even after commit da0f0b2c3ad2 ("clk: Correct lookup logic in clk_fetch_parent_index()") tried to optimize this path. Signed-off-by: Derek Basehore [sboyd@kernel.org: More description in commit text] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6ccdbedb02f3..d2477a5058ac 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1513,10 +1513,20 @@ static int clk_fetch_parent_index(struct clk_core *core, if (!parent) return -EINVAL; - for (i = 0; i < core->num_parents; i++) - if (clk_core_get_parent_by_index(core, i) == parent) + for (i = 0; i < core->num_parents; i++) { + if (core->parents[i] == parent) return i; + if (core->parents[i]) + continue; + + /* Fallback to comparing globally unique names */ + if (!strcmp(parent->name, core->parent_names[i])) { + core->parents[i] = parent; + return i; + } + } + return -EINVAL; } From a64a9c088b75cba5840320d57e0bbfb36739c3b5 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 18 Jan 2019 12:54:13 +0000 Subject: [PATCH 0156/1436] clk: imx: Fix fractional clock set rate computation Before multiplying by PLL_FRAC_DENOM, the temp64 needs to be temp64 = rate * 2 - divfi * parent_rate * 8, instead of: temp64 = (rate * 2 - divfi) * parent_rate Fixes: 6209624b9a5c1e ("clk: imx: Add fractional PLL output clock") Signed-off-by: Abel Vesa Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-frac-pll.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-frac-pll.c b/drivers/clk/imx/clk-frac-pll.c index 0026c3969b1e..76b9eb15604e 100644 --- a/drivers/clk/imx/clk-frac-pll.c +++ b/drivers/clk/imx/clk-frac-pll.c @@ -155,13 +155,14 @@ static int clk_pll_set_rate(struct clk_hw *hw, unsigned long rate, { struct clk_frac_pll *pll = to_clk_frac_pll(hw); u32 val, divfi, divff; - u64 temp64 = parent_rate; + u64 temp64; int ret; parent_rate *= 8; rate *= 2; divfi = rate / parent_rate; - temp64 *= rate - divfi; + temp64 = parent_rate * divfi; + temp64 = rate - temp64; temp64 *= PLL_FRAC_DENOM; do_div(temp64, parent_rate); divff = temp64; From 303aef8b84272d73999a3207dd05bbe10ed89dc5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Jan 2019 22:46:25 +0300 Subject: [PATCH 0157/1436] clk: ti: Fix error handling in ti_clk_parse_divider_data() The ti_clk_parse_divider_data() function is only called from _get_div_table_from_setup(). That function doesn't look at the return value but instead looks at the "*table" pointer. In this case, if the kcalloc() fails then *table is NULL (which means success). It should instead be an error pointer. The ti_clk_parse_divider_data() function has two callers. One checks for errors and the other doesn't. I have fixed it so now both handle errors. Fixes: 4f6be5655dc9 ("clk: ti: divider: add driver internal API for parsing divider data") Signed-off-by: Dan Carpenter Acked-by: Tero Kristo Signed-off-by: Stephen Boyd --- drivers/clk/ti/divider.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c index 8d77090ad94a..0241450f3eb3 100644 --- a/drivers/clk/ti/divider.c +++ b/drivers/clk/ti/divider.c @@ -403,8 +403,10 @@ int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div, num_dividers = i; tmp = kcalloc(valid_div + 1, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + if (!tmp) { + *table = ERR_PTR(-ENOMEM); return -ENOMEM; + } valid_div = 0; *width = 0; @@ -439,6 +441,7 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) { struct clk_omap_divider *div; struct clk_omap_reg *reg; + int ret; if (!setup) return NULL; @@ -458,6 +461,12 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) div->flags |= CLK_DIVIDER_POWER_OF_TWO; div->table = _get_div_table_from_setup(setup, &div->width); + if (IS_ERR(div->table)) { + ret = PTR_ERR(div->table); + kfree(div); + return ERR_PTR(ret); + } + div->shift = setup->bit_shift; div->latch = -EINVAL; From 9ff1a3b4912528f853048ccd9757ba6a2cc75557 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Tue, 18 Dec 2018 23:49:41 +0530 Subject: [PATCH 0158/1436] clk: qcom: gcc: Use active only source for CPUSS clocks The clocks of the CPUSS such as "gcc_cpuss_ahb_clk_src" is a CRITICAL clock and needs to vote on the active only source of XO, so as to keep the vote as long as CPUSS is active. Similar rbcpr_clk_src is also has the same requirement. Signed-off-by: Taniya Das Fixes: 06391eddb60a ("clk: qcom: Add Global Clock controller (GCC) driver for SDM845") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sdm845.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index c782e62dd98b..58fa5c247af1 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -115,8 +115,8 @@ static const char * const gcc_parent_names_6[] = { "core_bi_pll_test_se", }; -static const char * const gcc_parent_names_7[] = { - "bi_tcxo", +static const char * const gcc_parent_names_7_ao[] = { + "bi_tcxo_ao", "gpll0", "gpll0_out_even", "core_bi_pll_test_se", @@ -128,6 +128,12 @@ static const char * const gcc_parent_names_8[] = { "core_bi_pll_test_se", }; +static const char * const gcc_parent_names_8_ao[] = { + "bi_tcxo_ao", + "gpll0", + "core_bi_pll_test_se", +}; + static const struct parent_map gcc_parent_map_10[] = { { P_BI_TCXO, 0 }, { P_GPLL0_OUT_MAIN, 1 }, @@ -210,7 +216,7 @@ static struct clk_rcg2 gcc_cpuss_ahb_clk_src = { .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_ahb_clk_src", - .parent_names = gcc_parent_names_7, + .parent_names = gcc_parent_names_7_ao, .num_parents = 4, .ops = &clk_rcg2_ops, }, @@ -229,7 +235,7 @@ static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = { .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_rbcpr_clk_src", - .parent_names = gcc_parent_names_8, + .parent_names = gcc_parent_names_8_ao, .num_parents = 3, .ops = &clk_rcg2_ops, }, From 4fe8713b873fc881284722ce4ac47995de7cf62c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Jan 2018 09:09:41 -0800 Subject: [PATCH 0159/1436] xtensa: SMP: fix ccount_timer_shutdown ccount_timer_shutdown is called from the atomic context in the secondary_start_kernel, resulting in the following BUG: BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 Preemption disabled at: secondary_start_kernel+0xa1/0x130 Call Trace: ___might_sleep+0xe7/0xfc __might_sleep+0x41/0x44 synchronize_irq+0x24/0x64 disable_irq+0x11/0x14 ccount_timer_shutdown+0x12/0x20 clockevents_switch_state+0x82/0xb4 clockevents_exchange_device+0x54/0x60 tick_check_new_device+0x46/0x70 clockevents_register_device+0x8c/0xc8 clockevents_config_and_register+0x1d/0x2c local_timer_setup+0x75/0x7c secondary_start_kernel+0xb4/0x130 should_never_return+0x32/0x35 Use disable_irq_nosync instead of disable_irq to avoid it. This is safe because the ccount timer IRQ is per-CPU, and once IRQ is masked the ISR will not be called. Signed-off-by: Max Filippov --- arch/xtensa/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index fd524a54d2ab..378186b5eb40 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt); if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0; From 2a81efb0de0e33f2d2c83154af0bd3ce389b3269 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 10 Dec 2018 13:56:33 +0000 Subject: [PATCH 0160/1436] arm64: dts: add msm8996 compatible to gicv3 Add compatible to gicv3 node to enable quirk required to restrict writing to GICR_WAKER register which is restricted on msm8996 SoC in Hypervisor. With this quirk MSM8996 can at least boot out of mainline, which can help community to work with boards based on MSM8996. Without this patch Qualcomm DB820c board reboots on mainline. Signed-off-by: Srinivas Kandagatla Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 99b7495455a6..838e32cc14c9 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -404,7 +404,7 @@ }; intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>; From 093c61b6a0d5768f48442ec59d671496314cc47d Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 25 Jan 2019 07:40:32 +0000 Subject: [PATCH 0161/1436] i3c: fix missing detach if failed to retrieve i3c dev If we failed to retrieve the i3c dev, we should detach the i3c dev I.E i3c_master_detach_i3c_dev(). Signed-off-by: Jisheng Zhang Signed-off-by: Boris Brezillon --- drivers/i3c/master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index c39f89d2deba..2dc628d4f1ae 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1828,7 +1828,7 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, ret = i3c_master_retrieve_dev_info(newdev); if (ret) - goto err_free_dev; + goto err_detach_dev; olddev = i3c_master_search_i3c_dev_duplicate(newdev); if (olddev) { From 7d652669b61d702c6e62a39579d17f6881670ab6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Jan 2019 08:21:26 +0100 Subject: [PATCH 0162/1436] batman-adv: release station info tidstats With the addition of TXQ stats in the per-tid statistics the struct station_info grew significantly. This resulted in stack size warnings due to the structure itself being above the limit for the warnings. To work around this, the TID array was allocated dynamically. Also a function to free this content was introduced with commit 7ea3e110f2f8 ("cfg80211: release station info tidstats where needed") but the necessary changes were not provided for batman-adv's B.A.T.M.A.N. V implementation. Signed-off-by: Felix Fietkau Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info") [sven@narfation.org: add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb8..ef0dec20c7d8 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be From 2292552102b0599ab976072e5609eaf6fb6628f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Nov 2018 15:30:26 +0000 Subject: [PATCH 0163/1436] arm64: KVM: Don't generate UNDEF when LORegion feature is present We currently hide the LORegion feature, and generate an UNDEF if the guest dares using the corresponding registers. This is a bit extreme, as ARMv8.1 guarantees the feature to be present. The guest should check the feature register before doing anything, but we could also give the guest some slack (read "allow the guest to be a bit stupid"). So instead of unconditionnaly deliver an exception, let's only do it when the host doesn't support LORegion at all (or when the feature has been sanitized out), and treat the registers as RAZ/WI otherwise (with the exception of LORID_EL1 being RO). Fixes: cc33c4e20185 ("arm64/kvm: Prohibit guest LOR accesses") Suggested-by: Richard Henderson Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e3e37228ae4e..86096774abcd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -314,12 +314,29 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } -static bool trap_undef(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +/* + * ARMv8.1 mandates at least a trivial LORegion implementation, where all the + * RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0 + * system, these registers should UNDEF. LORID_EL1 being a RO register, we + * treat it separately. + */ +static bool trap_loregion(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { - kvm_inject_undefined(vcpu); - return false; + u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + u32 sr = sys_reg((u32)r->Op0, (u32)r->Op1, + (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + + if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) { + kvm_inject_undefined(vcpu); + return false; + } + + if (p->is_write && sr == SYS_LORID_EL1) + return write_to_read_only(vcpu, p, r); + + return trap_raz_wi(vcpu, p, r); } static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, @@ -1048,11 +1065,6 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz) if (val & ptrauth_mask) kvm_debug("ptrauth unsupported for guests, suppressing\n"); val &= ~ptrauth_mask; - } else if (id == SYS_ID_AA64MMFR1_EL1) { - if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT)) - kvm_debug("LORegions unsupported for guests, suppressing\n"); - - val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT); } return val; @@ -1338,11 +1350,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, - { SYS_DESC(SYS_LORSA_EL1), trap_undef }, - { SYS_DESC(SYS_LOREA_EL1), trap_undef }, - { SYS_DESC(SYS_LORN_EL1), trap_undef }, - { SYS_DESC(SYS_LORC_EL1), trap_undef }, - { SYS_DESC(SYS_LORID_EL1), trap_undef }, + { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, + { SYS_DESC(SYS_LOREA_EL1), trap_loregion }, + { SYS_DESC(SYS_LORN_EL1), trap_loregion }, + { SYS_DESC(SYS_LORC_EL1), trap_loregion }, + { SYS_DESC(SYS_LORID_EL1), trap_loregion }, { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, From 356690d029e10f1aadebc49819d3908d5f6389fb Mon Sep 17 00:00:00 2001 From: Nagadheeraj Rottela Date: Mon, 14 Jan 2019 13:52:24 +0000 Subject: [PATCH 0164/1436] crypto: cavium/nitrox - Invoke callback after DMA unmap In process_response_list() invoke the callback handler after unmapping the DMA buffers. It ensures DMA data is synced form device to cpu before the client code access the data from callback handler. Fixes: c9613335bf4f ("crypto: cavium/nitrox - Added AEAD cipher support") Signed-off-by: Nagadheeraj Rottela Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index fe070d75c842..4c97478d44bd 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -537,6 +537,8 @@ static void process_response_list(struct nitrox_cmdq *cmdq) struct nitrox_device *ndev = cmdq->ndev; struct nitrox_softreq *sr; int req_completed = 0, err = 0, budget; + completion_t callback; + void *cb_arg; /* check all pending requests */ budget = atomic_read(&cmdq->pending_count); @@ -564,13 +566,13 @@ static void process_response_list(struct nitrox_cmdq *cmdq) smp_mb__after_atomic(); /* remove from response list */ response_list_del(sr, cmdq); - /* ORH error code */ err = READ_ONCE(*sr->resp.orh) & 0xff; - - if (sr->callback) - sr->callback(sr->cb_arg, err); + callback = sr->callback; + cb_arg = sr->cb_arg; softreq_destroy(sr); + if (callback) + callback(cb_arg, err); req_completed++; } From d88c93f090f708c18195553b352b9f205e65418f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Jan 2019 11:27:02 +0100 Subject: [PATCH 0165/1436] debugfs: fix debugfs_rename parameter checking debugfs_rename() needs to check that the dentries passed into it really are valid, as sometimes they are not (i.e. if the return value of another debugfs call is passed into this one.) So fix this up by properly checking if the two parent directories are errors (they are allowed to be NULL), and if the dentry to rename is not NULL or an error. Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 13b01351dd1c..41ef452c1fcf 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -787,6 +787,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *trap; struct name_snapshot old_name; + if (IS_ERR(old_dir)) + return old_dir; + if (IS_ERR(new_dir)) + return new_dir; + if (IS_ERR_OR_NULL(old_dentry)) + return old_dentry; + trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) From 053ff09f1a8f2151339f9fda457c5250929d1c49 Mon Sep 17 00:00:00 2001 From: Sandy Huang Date: Wed, 23 Jan 2019 18:14:39 +0800 Subject: [PATCH 0166/1436] drm/rockchip: rgb: update SPDX license identifier Update SPDX License Identifier from GPL-2.0+ to GPL-2.0 and drop some GPL text. This fixes a mismatch between the existing SPDX headers and GPL boilerplate text. Fixes: 1f0f01515172 ("Add support for Rockchip Soc RGB output interface") Cc: stable@vger.kernel.org Reported-by: Thomas Gleixner Signed-off-by: Sandy Huang Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/1548238479-171491-1-git-send-email-hjc@rock-chips.com --- drivers/gpu/drm/rockchip/rockchip_rgb.c | 11 +---------- drivers/gpu/drm/rockchip/rockchip_rgb.h | 11 +---------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index 37f93022a106..c0351abf83a3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -1,17 +1,8 @@ -//SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd * Author: * Sandy Huang - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.h b/drivers/gpu/drm/rockchip/rockchip_rgb.h index 38b52e63b2b0..27b9635124bc 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.h +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.h @@ -1,17 +1,8 @@ -//SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd * Author: * Sandy Huang - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifdef CONFIG_ROCKCHIP_RGB From 9d9d4ff788845fad1626b80164e43a1f0f17ccbc Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Wed, 23 Jan 2019 10:09:27 +0800 Subject: [PATCH 0167/1436] RDMA/hns: Update the kernel header file of hns The hns_roce_ib_create_srq_resp is used to interact with the user for data, this was open coded to use a u32 directly, instead use a properly sized structure. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 10 ++++++++-- include/uapi/rdma/hns-abi.h | 5 +++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 960b1946c365..12deacf442cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -210,6 +210,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq; int srq_desc_size; int srq_buf_size; @@ -378,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->event = hns_roce_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->srqn; + resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { ret = -EFAULT; - goto err_wrid; + goto err_srqc_alloc; } } return &srq->ibsrq; +err_srqc_alloc: + hns_roce_srq_free(hr_dev, srq); + err_wrid: kvfree(srq->wrid); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index ef3c7ec793a7..eb76b38a00d4 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -52,6 +52,11 @@ struct hns_roce_ib_create_srq { __aligned_u64 que_addr; }; +struct hns_roce_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + struct hns_roce_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr; From a2093dd35f8cfb28dd7c878ccbd020c1bb20d0d7 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 22 Jan 2019 09:16:10 +0200 Subject: [PATCH 0168/1436] RDMA/umem: Add missing initialization of owning_mm When allocating a umem leaf for implicit ODP MR during page fault the field owning_mm was not set. Initialize and take a reference on this field to avoid kernel panic when trying to access this field. BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 PGD 800000022dfed067 P4D 800000022dfed067 PUD 22dfcf067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 634 Comm: kworker/u33:0 Not tainted 4.20.0-rc6+ #89 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: mlx5_ib_page_fault mlx5_ib_eqe_pf_action [mlx5_ib] RIP: 0010:ib_umem_odp_map_dma_pages+0xf3/0x710 [ib_core] Code: 45 c0 48 21 f3 48 89 75 b0 31 f6 4a 8d 04 33 48 89 45 a8 49 8b 44 24 60 48 8b 78 10 e8 66 16 a8 c5 49 8b 54 24 08 48 89 45 98 <8b> 42 58 85 c0 0f 84 8e 05 00 00 8d 48 01 48 8d 72 58 f0 0f b1 4a RSP: 0000:ffffb610813a7c20 EFLAGS: 00010202 RAX: ffff95ace6e8ac80 RBX: 0000000000000000 RCX: 000000000000000c RDX: 0000000000000000 RSI: 0000000000000850 RDI: ffff95aceaadae80 RBP: ffffb610813a7ce0 R08: 0000000000000000 R09: 0000000000080c77 R10: ffff95acfffdbd00 R11: 0000000000000000 R12: ffff95aceaa20a00 R13: 0000000000001000 R14: 0000000000001000 R15: 000000000000000c FS: 0000000000000000(0000) GS:ffff95acf7800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000058 CR3: 000000022c834001 CR4: 00000000001606f0 Call Trace: pagefault_single_data_segment+0x1df/0xc60 [mlx5_ib] mlx5_ib_eqe_pf_action+0x7bc/0xa70 [mlx5_ib] ? __switch_to+0xe1/0x470 process_one_work+0x174/0x390 worker_thread+0x4f/0x3e0 kthread+0x102/0x140 ? drain_workqueue+0x130/0x130 ? kthread_stop+0x110/0x110 ret_from_fork+0x1f/0x30 Fixes: f27a0d50a4bc ("RDMA/umem: Use umem->owning_mm inside ODP") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index a4ec43093cb3..acb882f279cb 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -352,6 +352,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, umem->writable = 1; umem->is_odp = 1; odp_data->per_mm = per_mm; + umem->owning_mm = per_mm->mm; + mmgrab(umem->owning_mm); mutex_init(&odp_data->umem_mutex); init_completion(&odp_data->notifier_completion); @@ -384,6 +386,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, out_page_list: vfree(odp_data->page_list); out_odp_data: + mmdrop(umem->owning_mm); kfree(odp_data); return ERR_PTR(ret); } From d0b95e6cd298a785c126e75a085af6dd7b7b1f60 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Jan 2019 16:04:06 +0000 Subject: [PATCH 0169/1436] ASoC: core: Allow soc_find_component lookups to match parent of_node For devices implemented as a MFD it is common to only have a single node in devicetree representing the whole device. As such when looking up components in soc_find_components we should match against both the devices of_node and the devices parent's of_node, as is already done in the rest of the ASoC core. This causes regressions for some DAI links at the moment as soc_find_component was recently added as a check in soc_init_dai_link. Fixes: 8780cf1142a5 ("ASoC: soc-core: defer card probe until all component is added to list") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index aae450ba4f08..ea16c2b199ce 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -735,12 +735,17 @@ static struct snd_soc_component *soc_find_component( const struct device_node *of_node, const char *name) { struct snd_soc_component *component; + struct device_node *component_of_node; lockdep_assert_held(&client_mutex); for_each_component(component) { if (of_node) { - if (component->dev->of_node == of_node) + component_of_node = component->dev->of_node; + if (!component_of_node && component->dev->parent) + component_of_node = component->dev->parent->of_node; + + if (component_of_node == of_node) return component; } else if (name && strcmp(component->name, name) == 0) { return component; From 9e6966646b6bc5078d579151b90016522d4ff2cb Mon Sep 17 00:00:00 2001 From: Olek Poplavsky Date: Thu, 24 Jan 2019 23:30:03 -0500 Subject: [PATCH 0170/1436] ALSA: usb-audio: Add Opus #3 to quirks for native DSD support This patch adds quirk VID/PID IDs for the Opus #3 DAP (made by 'The Bit') in order to enable Native DSD support. [ NOTE: this could be handled in the generic way with fp->dvd_raw if we add 0x10cb to the vendor whitelist, but since 0x10cb shows a different vendor name (Erantech), put to the individual entry at this time -- tiwai ] Signed-off-by: Olek Poplavsky Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ebbadb3a7094..bb8372833fc2 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1492,6 +1492,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; + case USB_ID(0x10cb, 0x0103): /* The Bit Opus #3; with fp->dsd_raw */ case USB_ID(0x152a, 0x85de): /* SMSL D1 DAC */ case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */ case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */ From e190161f96b88ffae870405fd6c3fdd1d2e7f98d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Jan 2019 17:11:32 +0100 Subject: [PATCH 0171/1436] ALSA: pcm: Fix tight loop of OSS capture stream When the trigger=off is passed for a PCM OSS stream, it sets the start_threshold of the given substream to the boundary size, so that it won't be automatically started. This can be problematic for a capture stream, unfortunately, as detected by syzkaller. The scenario is like the following: - In __snd_pcm_lib_xfer() that is invoked from snd_pcm_oss_read() loop, we have a check whether the stream was already started or the stream can be auto-started. - The function at this check returns 0 with trigger=off since we explicitly disable the auto-start. - The loop continues and repeats calling __snd_pcm_lib_xfer() tightly, which may lead to an RCU stall. This patch fixes the bug by simply allowing the wait for non-started stream in the case of OSS capture. For native usages, it's supposed to be done by the caller side (which is user-space), hence it returns zero like before. (In theory, __snd_pcm_lib_xfer() could wait even for the native API usage cases, too; but I'd like to stay in a safer side for not breaking the existing stuff for now.) Reported-by: syzbot+fbe0496f92a0ce7b786c@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 40013b26f671..6c99fa8ac5fa 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2112,6 +2112,13 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, return 0; } +/* allow waiting for a capture stream that hasn't been started */ +#if IS_ENABLED(CONFIG_SND_PCM_OSS) +#define wait_capture_start(substream) ((substream)->oss.oss) +#else +#define wait_capture_start(substream) false +#endif + /* the common loop for read/write data */ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, void *data, bool interleaved, @@ -2182,7 +2189,7 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, err = snd_pcm_start(substream); if (err < 0) goto _end_unlock; - } else { + } else if (!wait_capture_start(substream)) { /* nothing to do */ err = 0; goto _end_unlock; From ae662eec8a515ab550524e04c793b5ddf1aae3a1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 10 Dec 2018 06:21:46 +0000 Subject: [PATCH 0172/1436] riscv: Adjust mmap base address at a third of task size This ratio is the most used among all other architectures and make icache_hygiene libhugetlbfs test pass: this test mmap lots of hugepages whose addresses, without this patch, reach the end of the process user address space. Signed-off-by: Alexandre Ghiti Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 0531f49af5c3..ce70bceb8872 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -22,7 +22,7 @@ * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE >> 1) +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP From aba1e632c2e810bd685a2c954ebc726e5ca30b2b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 21 Aug 2018 11:21:09 +0100 Subject: [PATCH 0173/1436] iwlwifi: pcie: make array 'prop' static, shrinks object size Don't populate the array prop on the stack but instead make it static. Makes the object code smaller by 30 bytes: Before: text data bss dec hex filename 80138 15382 576 96096 17760 drivers/net/wireless/intel/iwlwifi/pcie/trans.o After: text data bss dec hex filename 79948 15542 576 96066 17742 drivers/net/wireless/intel/iwlwifi/pcie/trans.o (gcc version 8.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f97aea5ffc44..dbe62bc3ece4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1968,7 +1968,7 @@ static void iwl_trans_pcie_removal_wk(struct work_struct *wk) struct iwl_trans_pcie_removal *removal = container_of(wk, struct iwl_trans_pcie_removal, work); struct pci_dev *pdev = removal->pdev; - char *prop[] = {"EVENT=INACCESSIBLE", NULL}; + static char *prop[] = {"EVENT=INACCESSIBLE", NULL}; dev_err(&pdev->dev, "Device gone - attempting removal\n"); kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop); From d3561e0ecd75a595cb4a477fa4c65fdb51535364 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Thu, 13 Sep 2018 14:52:59 +0300 Subject: [PATCH 0174/1436] iwlwifi: wrt: add to dump number of lmacs, lmac1 and umac error id Add to the dump the number of lmacs, the error id of the umac and the error id of lmac1, if supported. In case the reason for the dump trigger is not an assert the error id is zero. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 8 +++++++- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 5 ++++- drivers/net/wireless/intel/iwlwifi/fw/error-dump.h | 11 +++++++++-- drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 13 ++++++++----- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 2a19b178c5e8..d4d6544f8833 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -828,7 +828,13 @@ _iwl_fw_error_dump(struct iwl_fw_runtime *fwrt, sizeof(dump_info->dev_human_readable) - 1); strncpy(dump_info->bus_human_readable, fwrt->dev->bus->name, sizeof(dump_info->bus_human_readable) - 1); - dump_info->rt_status = cpu_to_le32(fwrt->dump.rt_status); + dump_info->num_of_lmacs = fwrt->smem_cfg.num_lmacs; + dump_info->lmac_err_id[0] = + cpu_to_le32(fwrt->dump.lmac_err_id[0]); + if (fwrt->smem_cfg.num_lmacs > 1) + dump_info->lmac_err_id[1] = + cpu_to_le32(fwrt->dump.lmac_err_id[1]); + dump_info->umac_err_id = cpu_to_le32(fwrt->dump.umac_err_id); dump_data = iwl_fw_error_next_data(dump_data); } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 6aabbdd72326..40dcc3deb257 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -102,7 +102,10 @@ static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt) if (fwrt->dump.desc != &iwl_dump_desc_assert) kfree(fwrt->dump.desc); fwrt->dump.desc = NULL; - fwrt->dump.rt_status = 0; + fwrt->dump.lmac_err_id[0] = 0; + if (fwrt->smem_cfg.num_lmacs > 1) + fwrt->dump.lmac_err_id[1] = 0; + fwrt->dump.umac_err_id = 0; } void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h index 65faecf552cd..c02425a1e64f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h @@ -180,6 +180,8 @@ enum iwl_fw_error_dump_family { IWL_FW_ERROR_DUMP_FAMILY_8 = 8, }; +#define MAX_NUM_LMAC 2 + /** * struct iwl_fw_error_dump_info - info on the device / firmware * @device_family: the family of the device (7 / 8) @@ -187,7 +189,10 @@ enum iwl_fw_error_dump_family { * @fw_human_readable: human readable FW version * @dev_human_readable: name of the device * @bus_human_readable: name of the bus used - * @rt_status: the error_id/rt_status that that triggered the latest dump + * @num_of_lmacs: the number of lmacs + * @lmac_err_id: the lmac 0/1 error_id/rt_status that triggered the latest dump + * if the dump collection was not initiated by an assert, the value is 0 + * @umac_err_id: the umac error_id/rt_status that triggered the latest dump * if the dump collection was not initiated by an assert, the value is 0 */ struct iwl_fw_error_dump_info { @@ -196,7 +201,9 @@ struct iwl_fw_error_dump_info { u8 fw_human_readable[FW_VER_HUMAN_READABLE_SZ]; u8 dev_human_readable[64]; u8 bus_human_readable[8]; - __le32 rt_status; + u8 num_of_lmacs; + __le32 umac_err_id; + __le32 lmac_err_id[MAX_NUM_LMAC]; } __packed; /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index 4f7090f88cb0..a0fcbb28114b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -142,7 +142,8 @@ struct iwl_fw_runtime { u32 *d3_debug_data; struct iwl_fw_ini_active_regs active_regs[IWL_FW_INI_MAX_REGION_ID]; struct iwl_fw_ini_active_triggers active_trigs[IWL_FW_TRIGGER_ID_NUM]; - u32 rt_status; + u32 lmac_err_id[MAX_NUM_LMAC]; + u32 umac_err_id; } dump; #ifdef CONFIG_IWLWIFI_DEBUGFS struct { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index d116c6ae18ff..e8659bf50890 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -463,6 +463,9 @@ static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm) iwl_trans_read_mem_bytes(trans, mvm->umac_error_event_table, &table, sizeof(table)); + if (table.valid) + mvm->fwrt.dump.umac_err_id = table.error_id; + if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { IWL_ERR(trans, "Start IWL Error Log Dump:\n"); IWL_ERR(trans, "Status: 0x%08lX, count: %d\n", @@ -486,11 +489,11 @@ static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm) IWL_ERR(mvm, "0x%08X | isr status reg\n", table.nic_isr_pref); } -static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base) +static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u8 lmac_num) { struct iwl_trans *trans = mvm->trans; struct iwl_error_event_table table; - u32 val; + u32 val, base = mvm->error_event_table[lmac_num]; if (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) { if (!base) @@ -541,7 +544,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base) iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table)); if (table.valid) - mvm->fwrt.dump.rt_status = table.error_id; + mvm->fwrt.dump.lmac_err_id[lmac_num] = table.error_id; if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { IWL_ERR(trans, "Start IWL Error Log Dump:\n"); @@ -598,10 +601,10 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) return; } - iwl_mvm_dump_lmac_error_log(mvm, mvm->error_event_table[0]); + iwl_mvm_dump_lmac_error_log(mvm, 0); if (mvm->error_event_table[1]) - iwl_mvm_dump_lmac_error_log(mvm, mvm->error_event_table[1]); + iwl_mvm_dump_lmac_error_log(mvm, 1); iwl_mvm_dump_umac_error_log(mvm); } From 7360f99e0b99e325ae945eb39a3407215403d4d2 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 12 Sep 2018 14:10:56 +0300 Subject: [PATCH 0175/1436] iwlwifi: mvm: advertise support for TWT in the Extended Capability IE We want to advertise support for TWT in the Extended Capability IE. Since we don't want to set the bits for all the interface types, define an interface specific configuration. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 3 +-- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index d9afedc3d1d9..173ade96f119 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -569,8 +569,7 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = { .has_he = true, .he_cap_elem = { .mac_cap_info[0] = - IEEE80211_HE_MAC_CAP0_HTC_HE | - IEEE80211_HE_MAC_CAP0_TWT_RES, + IEEE80211_HE_MAC_CAP0_HTC_HE, .mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 97dc464379d2..310a36dd33cf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -395,6 +395,21 @@ int iwl_mvm_init_fw_regd(struct iwl_mvm *mvm) return ret; } +const static u8 he_if_types_ext_capa_sta[] = { + [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, + [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF, + [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT, +}; + +const static struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = { + { + .iftype = NL80211_IFTYPE_STATION, + .extended_capabilities = he_if_types_ext_capa_sta, + .extended_capabilities_mask = he_if_types_ext_capa_sta, + .extended_capabilities_len = sizeof(he_if_types_ext_capa_sta), + }, +}; + int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) { struct ieee80211_hw *hw = mvm->hw; @@ -673,6 +688,13 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE); } + if (mvm->nvm_data->sku_cap_11ax_enable && + !iwlwifi_mod_params.disable_11ax) { + hw->wiphy->iftype_ext_capab = he_iftypes_ext_capa; + hw->wiphy->num_iftype_ext_capab = + ARRAY_SIZE(he_iftypes_ext_capa); + } + mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; #ifdef CONFIG_PM_SLEEP From 866a6a85d4680403a79b347b9e81a35d85f57471 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 12 Sep 2018 16:25:27 +0300 Subject: [PATCH 0176/1436] iwlwifi: mvm: rely on mac80211 to configure TWT support Mac80211 will check both the HE Capability IE and the Extended Capability IE, so set the TWT support bit when mac80211 tells us to. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 7779951a9533..eb66269ddf73 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -778,27 +778,9 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, if (vif->bss_conf.assoc && vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) { - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - u8 sta_id = mvmvif->ap_sta_id; - cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_11AX); - if (sta_id != IWL_MVM_INVALID_STA) { - struct ieee80211_sta *sta; - - sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], - lockdep_is_held(&mvm->mutex)); - - /* - * TODO: we should check the ext cap IE but it is - * unclear why the spec requires two bits (one in HE - * cap IE, and one in the ext cap IE). In the meantime - * rely on the HE cap IE only. - */ - if (sta && (sta->he_cap.he_cap_elem.mac_cap_info[0] & - IEEE80211_HE_MAC_CAP0_TWT_RES)) - ctxt_sta->data_policy |= - cpu_to_le32(TWT_SUPPORTED); - } + if (vif->bss_conf.twt_requester) + ctxt_sta->data_policy |= cpu_to_le32(TWT_SUPPORTED); } From 8093bb6d4feee04aefaf6abef4179193b54f4b8a Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 25 Sep 2018 11:21:50 +0300 Subject: [PATCH 0177/1436] iwlwifi: add PCI IDs for the 22260 device series Add new structs and PCI IDs for 22260 devices. Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 40 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 3 ++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 10 +++++ .../net/wireless/intel/iwlwifi/pcie/trans.c | 3 +- 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 7e65073834b7..588433da5ab1 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -83,6 +83,7 @@ #define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-" #define IWL_22000_SU_Z0_FW_PRE "iwlwifi-su-z0-" #define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-" +#define IWL_CC_A_FW_PRE "iwlwifi-cc-a0-" #define IWL_22000_HR_MODULE_FIRMWARE(api) \ IWL_22000_HR_FW_PRE __stringify(api) ".ucode" @@ -104,6 +105,8 @@ IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode" #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \ IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode" +#define IWL_CC_A_MODULE_FIRMWARE(api) \ + IWL_CC_A_FW_PRE __stringify(api) ".ucode" static const struct iwl_base_params iwl_22000_base_params = { .eeprom_size = OTP_LOW_IMAGE_SIZE_32K, @@ -207,6 +210,42 @@ const struct iwl_cfg iwl22000_2ax_cfg_hr = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; +const struct iwl_cfg iwl22260_2ax_cfg = { + .name = "Intel(R) Wireless-AX 22260", + .fw_name_pre = IWL_CC_A_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + +const struct iwl_cfg killer1650x_2ax_cfg = { + .name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (22260NGW)", + .fw_name_pre = IWL_CC_A_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + +const struct iwl_cfg killer1650w_2ax_cfg = { + .name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (22260D2W)", + .fw_name_pre = IWL_CC_A_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + /* * All JF radio modules are part of the 9000 series, but the MAC part * looks more like 22000. That's why this device is here, but called @@ -324,3 +363,4 @@ MODULE_FIRMWARE(IWL_22000_JF_B0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 91861a9cbe57..1261a29f3b40 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -571,6 +571,9 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; +extern const struct iwl_cfg iwl22260_2ax_cfg; +extern const struct iwl_cfg killer1650x_2ax_cfg; +extern const struct iwl_cfg killer1650w_2ax_cfg; extern const struct iwl_cfg iwl9461_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9462_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9560_2ac_cfg_qu_b0_jf_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 353581ccc01e..4462133c12ba 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -898,6 +898,16 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x00B0, iwl22000_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0A10, iwl22000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)}, + {IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)}, + + {IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)}, + {IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)}, + #endif /* CONFIG_IWLMVM */ {0} diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index dbe62bc3ece4..fc2f98f2d516 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3602,7 +3602,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_no_pci; } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == - CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) { + CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && + trans->cfg != &iwl22260_2ax_cfg) { u32 hw_status; hw_status = iwl_read_prph(trans, UMAG_GEN_HW_STATUS); From 99be6166a4a0179b3a7e7986ea022136b812f95a Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 2 Oct 2018 11:42:28 +0300 Subject: [PATCH 0178/1436] iwlwifi: pcie: recognize NICs with hw_rev 0x364 correctly Some devices with PCI ID 0x2723, which is supposed to be 22260, are actually not. So we need to differentiate them by checking the hw_rev and change the cfg accordingly. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index caa5806acd81..42af421bbc3c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -325,6 +325,7 @@ enum { #define CSR_HW_REV_TYPE_7265D (0x0000210) #define CSR_HW_REV_TYPE_NONE (0x00001F0) #define CSR_HW_REV_TYPE_QNJ (0x0000360) +#define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364) #define CSR_HW_REV_TYPE_HR_CDB (0x0000340) /* RF_ID value */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index fc2f98f2d516..633444d2f930 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3569,6 +3569,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } + IWL_DEBUG_INFO(trans, "HW REV: 0x%0x\n", trans->hw_rev); + /* * 9000-series integrated A-step has a problem with suspend/resume * and sometimes even causes the whole platform to get stuck. This @@ -3603,7 +3605,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && - trans->cfg != &iwl22260_2ax_cfg) { + (trans->cfg != &iwl22260_2ax_cfg || + trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) { u32 hw_status; hw_status = iwl_read_prph(trans, UMAG_GEN_HW_STATUS); From 7703238ef7e56b96eb0d23e629245b1d56241224 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Sun, 16 Sep 2018 09:58:15 +0300 Subject: [PATCH 0179/1436] iwlwifi: mvm: fix wrong DCM TLC config When configuring TLC DCM flag: 1. check the peer's RX DCM capabilities (since we TX) 2. do not set DCM_NSS_2 since we do not support it Signed-off-by: Shaul Triebitz Fixes: 423584dd8060 ("iwlwifi: rs-fw: support dcm") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index dabbc04853ac..a0ea0c160fd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -149,14 +149,9 @@ static u16 rs_fw_set_config_flags(struct iwl_mvm *mvm, if (he_cap && he_cap->has_he && (he_cap->he_cap_elem.phy_cap_info[3] & - IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK)) { + IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK)) flags |= IWL_TLC_MNG_CFG_FLAGS_HE_DCM_NSS_1_MSK; - if (he_cap->he_cap_elem.phy_cap_info[3] & - IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2) - flags |= IWL_TLC_MNG_CFG_FLAGS_HE_DCM_NSS_2_MSK; - } - return flags; } From a98e2802a654e240d9020546fd29e5632da5f848 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Sun, 5 Aug 2018 15:05:45 +0300 Subject: [PATCH 0180/1436] iwlwifi: correct one of the PCI struct names One of the cfg struct names is mistakenly "iwl22000", when it should be "iwl22560". Chage-Id: If9fbfa4bceef81d028c90c98d47115fbe39da547 Signed-off-by: Ihab Zhaika Fixes: 2f7a3863191a ("iwlwifi: rename the temporary name of A000 to the official 22000") Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 4 +-- .../net/wireless/intel/iwlwifi/iwl-config.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 34 +++++++++---------- .../net/wireless/intel/iwlwifi/pcie/trans.c | 4 +-- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 588433da5ab1..310bef797a43 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -198,8 +198,8 @@ const struct iwl_cfg iwl22000_2ac_cfg_jf = { IWL_DEVICE_22500, }; -const struct iwl_cfg iwl22000_2ax_cfg_hr = { - .name = "Intel(R) Dual Band Wireless AX 22000", +const struct iwl_cfg iwl22560_2ax_cfg_hr = { + .name = "Intel(R) Wireless-AX 22560", .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, IWL_DEVICE_22500, /* diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 1261a29f3b40..6695aa8fbfba 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -570,6 +570,7 @@ extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl22000_2ac_cfg_hr; extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb; extern const struct iwl_cfg iwl22000_2ac_cfg_jf; +extern const struct iwl_cfg iwl22560_2ax_cfg_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; extern const struct iwl_cfg iwl22260_2ax_cfg; extern const struct iwl_cfg killer1650x_2ax_cfg; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 4462133c12ba..6694efc6a431 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -871,32 +871,32 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA370, 0x42A4, iwl9462_2ac_cfg_soc)}, /* 22000 Series */ - {IWL_PCI_DEVICE(0x2720, 0x0000, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0040, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0078, iwl22000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0000, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0040, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0078, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0070, iwl22000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0030, iwl22000_2ac_cfg_hr_cdb)}, - {IWL_PCI_DEVICE(0x2720, 0x1080, iwl22000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x1080, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0090, iwl22000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0310, iwl22000_2ac_cfg_hr_cdb)}, - {IWL_PCI_DEVICE(0x34F0, 0x0040, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x34F0, 0x0070, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x34F0, 0x0078, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x34F0, 0x0310, iwl22000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x0040, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x0310, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x40C0, 0x0000, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40C0, 0x0010, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40c0, 0x0090, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40C0, 0x0310, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40C0, 0x0A10, iwl22560_2ax_cfg_su_cdb)}, - {IWL_PCI_DEVICE(0x43F0, 0x0040, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x43F0, 0x0070, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x43F0, 0x0078, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0000, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0040, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0070, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0078, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x00B0, iwl22000_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0A10, iwl22000_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x0040, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0000, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0040, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x00B0, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0A10, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)}, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 633444d2f930..55ccfa22b3c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3585,10 +3585,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, #if IS_ENABLED(CONFIG_IWLMVM) trans->hw_rf_id = iwl_read32(trans, CSR_HW_RF_ID); - if (cfg == &iwl22000_2ax_cfg_hr) { + if (cfg == &iwl22560_2ax_cfg_hr) { if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) { - trans->cfg = &iwl22000_2ax_cfg_hr; + trans->cfg = &iwl22560_2ax_cfg_hr; } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_JF)) { trans->cfg = &iwl22000_2ax_cfg_jf; From 5b74a9368958d9d424425f6e89b59fed49df6f96 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Sun, 5 Aug 2018 16:05:25 +0300 Subject: [PATCH 0181/1436] iwlwifi: add new cards for 22560, 9260 and killer series add few PCI ID'S for 22560, 9260 and killer series. Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 24 ++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 2 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 48 +++++++++++++++++-- 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 310bef797a43..285e02e1e7b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -281,6 +281,30 @@ const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0 = { IWL_DEVICE_22500, }; +const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = { + .name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)", + .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + +const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = { + .name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)", + .fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + const struct iwl_cfg iwl22000_2ax_cfg_jf = { .name = "Intel(R) Dual Band Wireless AX 22000", .fw_name_pre = IWL_QU_B_JF_B_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 6695aa8fbfba..e50cf8c88dfd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -573,6 +573,8 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_jf; extern const struct iwl_cfg iwl22560_2ax_cfg_hr; extern const struct iwl_cfg iwl22000_2ax_cfg_hr; extern const struct iwl_cfg iwl22260_2ax_cfg; +extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; +extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650x_2ax_cfg; extern const struct iwl_cfg killer1650w_2ax_cfg; extern const struct iwl_cfg iwl9461_2ac_cfg_qu_b0_jf_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 6694efc6a431..461135f8cef1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -566,6 +566,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0018, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x001C, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_cfg)}, @@ -596,12 +597,14 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x8010, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, @@ -871,18 +874,45 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA370, 0x42A4, iwl9462_2ac_cfg_soc)}, /* 22000 Series */ + {IWL_PCI_DEVICE(0x02F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x02F0, 0x0074, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x02F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x02F0, 0x007C, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x02F0, 0x0310, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x02F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x0074, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x007C, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x0310, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x06F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x06F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x06F0, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0000, iwl22560_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0040, iwl22560_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0078, iwl22560_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0070, iwl22000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0030, iwl22000_2ac_cfg_hr_cdb)}, - {IWL_PCI_DEVICE(0x2720, 0x1080, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0040, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0070, iwl22000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x0074, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x007C, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0090, iwl22000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0310, iwl22000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x0A10, iwl22000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x1080, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x2720, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x2720, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x34F0, 0x0040, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x34F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x0074, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x34F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x007C, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x34F0, 0x0310, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x34F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x40C0, 0x0000, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40C0, 0x0010, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x40c0, 0x0090, iwl22560_2ax_cfg_su_cdb)}, @@ -890,13 +920,23 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x40C0, 0x0A10, iwl22560_2ax_cfg_su_cdb)}, {IWL_PCI_DEVICE(0x43F0, 0x0040, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x43F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x0074, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x43F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x007C, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0x43F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0000, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0040, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0070, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0074, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0078, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x007C, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x00B0, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0xA0F0, 0x0A10, iwl22560_2ax_cfg_hr)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)}, {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)}, From 606b9ab67784b02572269d0c76f28bbef9328c6b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 4 Oct 2018 10:41:45 +0300 Subject: [PATCH 0182/1436] iwlwifi: mvm: fix firmware statistics usage The new (CDB) statistics API is used by non-CDB devices as well. Look at the right TLV flag to know which version of the statistics notification to use. To avoid confusion, remove the _cdb suffix from the structure name. While at it, remove a structure that was never used. Signed-off-by: Emmanuel Grumbach Fixes: 678d9b6dddea ("iwlwifi: mvm: update rx statistics cmd api") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/stats.h | 15 +++++---------- drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 10 +++++----- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h index 53cb622aa9ab..318843138490 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -29,6 +30,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -363,14 +365,7 @@ struct mvm_statistics_general_v8 { u8 reserved[4 - (NUM_MAC_INDEX % 4)]; } __packed; /* STATISTICS_GENERAL_API_S_VER_8 */ -struct mvm_statistics_general_cdb_v9 { - struct mvm_statistics_general_common_v19 common; - __le32 beacon_counter[NUM_MAC_INDEX_CDB]; - u8 beacon_average_energy[NUM_MAC_INDEX_CDB]; - u8 reserved[4 - (NUM_MAC_INDEX_CDB % 4)]; -} __packed; /* STATISTICS_GENERAL_API_S_VER_9 */ - -struct mvm_statistics_general_cdb { +struct mvm_statistics_general { struct mvm_statistics_general_common common; __le32 beacon_counter[MAC_INDEX_AUX]; u8 beacon_average_energy[MAC_INDEX_AUX]; @@ -435,11 +430,11 @@ struct iwl_notif_statistics_v11 { struct mvm_statistics_load_v1 load_stats; } __packed; /* STATISTICS_NTFY_API_S_VER_11 */ -struct iwl_notif_statistics_cdb { +struct iwl_notif_statistics { __le32 flag; struct mvm_statistics_rx rx; struct mvm_statistics_tx tx; - struct mvm_statistics_general_cdb general; + struct mvm_statistics_general general; struct mvm_statistics_load load_stats; } __packed; /* STATISTICS_NTFY_API_S_VER_13 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 6653a238f32e..235ab26ca429 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -599,8 +599,8 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, * data copied into the "data" struct, but rather the data from * the notification directly. */ - if (iwl_mvm_is_cdb_supported(mvm)) { - struct mvm_statistics_general_cdb *general = + if (iwl_mvm_has_new_rx_stats_api(mvm)) { + struct mvm_statistics_general *general = data->general; mvmvif->beacon_stats.num_beacons = @@ -723,7 +723,7 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm, else expected_size = sizeof(struct iwl_notif_statistics_v10); } else { - expected_size = sizeof(struct iwl_notif_statistics_cdb); + expected_size = sizeof(struct iwl_notif_statistics); } if (WARN_ONCE(iwl_rx_packet_payload_len(pkt) != expected_size, @@ -753,7 +753,7 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm, flags = stats->flag; } else { - struct iwl_notif_statistics_cdb *stats = (void *)&pkt->data; + struct iwl_notif_statistics *stats = (void *)&pkt->data; data.mac_id = stats->rx.general.mac_id; data.beacon_filter_average_energy = @@ -792,7 +792,7 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm, bytes = (void *)&v11->load_stats.byte_count; air_time = (void *)&v11->load_stats.air_time; } else { - struct iwl_notif_statistics_cdb *stats = (void *)&pkt->data; + struct iwl_notif_statistics *stats = (void *)&pkt->data; energy = (void *)&stats->load_stats.avg_energy; bytes = (void *)&stats->load_stats.byte_count; From b7226f104cef442635de488cdfa95b715ae4c6d3 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 4 Oct 2018 14:28:02 +0300 Subject: [PATCH 0183/1436] iwlwifi: remove support for 9000 A-step devices We don't support 9000 A-step devices anymore, so we can remove support for loading both the a0/a0 and a0/b0 FWs. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 110 ++++++------------ .../net/wireless/intel/iwlwifi/iwl-config.h | 6 - drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 27 ++--- 3 files changed, 45 insertions(+), 98 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index f2114137c13f..09618f176f4f 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -73,21 +73,12 @@ #define IWL9000_SMEM_OFFSET 0x400000 #define IWL9000_SMEM_LEN 0x68000 -#define IWL9000A_FW_PRE "iwlwifi-9000-pu-a0-jf-a0-" -#define IWL9000B_FW_PRE "iwlwifi-9000-pu-b0-jf-b0-" -#define IWL9000RFB_FW_PRE "iwlwifi-9000-pu-a0-jf-b0-" -#define IWL9260A_FW_PRE "iwlwifi-9260-th-a0-jf-a0-" -#define IWL9260B_FW_PRE "iwlwifi-9260-th-b0-jf-b0-" -#define IWL9000A_MODULE_FIRMWARE(api) \ - IWL9000A_FW_PRE __stringify(api) ".ucode" -#define IWL9000B_MODULE_FIRMWARE(api) \ - IWL9000B_FW_PRE __stringify(api) ".ucode" -#define IWL9000RFB_MODULE_FIRMWARE(api) \ - IWL9000RFB_FW_PRE __stringify(api) ".ucode" -#define IWL9260A_MODULE_FIRMWARE(api) \ - IWL9260A_FW_PRE __stringify(api) ".ucode" -#define IWL9260B_MODULE_FIRMWARE(api) \ - IWL9260B_FW_PRE __stringify(api) ".ucode" +#define IWL9000_FW_PRE "iwlwifi-9000-pu-b0-jf-b0-" +#define IWL9260_FW_PRE "iwlwifi-9260-th-b0-jf-b0-" +#define IWL9000_MODULE_FIRMWARE(api) \ + IWL9000_FW_PRE __stringify(api) ".ucode" +#define IWL9260_MODULE_FIRMWARE(api) \ + IWL9260_FW_PRE __stringify(api) ".ucode" static const struct iwl_base_params iwl9000_base_params = { .eeprom_size = OTP_LOW_IMAGE_SIZE_32K, @@ -162,81 +153,67 @@ static const struct iwl_tt_params iwl9000_tt_params = { const struct iwl_cfg iwl9160_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9160", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9260_killer_2ac_cfg = { .name = "Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9270_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9270", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9460_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9460", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9460_2ac_cfg_soc = { .name = "Intel(R) Dual Band Wireless AC 9460", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, }; const struct iwl_cfg iwl9461_2ac_cfg_soc = { - .name = "Intel(R) Dual Band Wireless AC 9461", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, - IWL_DEVICE_9000, - .integrated = true, - .soc_latency = 5000, + .name = "Intel(R) Dual Band Wireless AC 9461", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .integrated = true, + .soc_latency = 5000, }; const struct iwl_cfg iwl9462_2ac_cfg_soc = { - .name = "Intel(R) Dual Band Wireless AC 9462", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, - IWL_DEVICE_9000, - .integrated = true, - .soc_latency = 5000, + .name = "Intel(R) Dual Band Wireless AC 9462", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .integrated = true, + .soc_latency = 5000, }; const struct iwl_cfg iwl9560_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9560", - .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, + .fw_name_pre = IWL9260_FW_PRE, IWL_DEVICE_9000, }; const struct iwl_cfg iwl9560_2ac_cfg_soc = { .name = "Intel(R) Dual Band Wireless AC 9560", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -244,9 +221,7 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = { const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = { .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -254,9 +229,7 @@ const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = { const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc = { .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -264,9 +237,7 @@ const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc = { const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9460", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -275,9 +246,7 @@ const struct iwl_cfg iwl9460_2ac_cfg_shared_clk = { const struct iwl_cfg iwl9461_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9461", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -286,9 +255,7 @@ const struct iwl_cfg iwl9461_2ac_cfg_shared_clk = { const struct iwl_cfg iwl9462_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9462", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -297,9 +264,7 @@ const struct iwl_cfg iwl9462_2ac_cfg_shared_clk = { const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = { .name = "Intel(R) Dual Band Wireless AC 9560", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -308,9 +273,7 @@ const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = { const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = { .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, @@ -319,17 +282,12 @@ const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = { const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk = { .name = "Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)", - .fw_name_pre = IWL9000A_FW_PRE, - .fw_name_pre_b_or_c_step = IWL9000B_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9000_FW_PRE, IWL_DEVICE_9000, .integrated = true, .soc_latency = 5000, .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK }; -MODULE_FIRMWARE(IWL9000A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9000B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9000RFB_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9260A_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9260B_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL9260_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index e50cf8c88dfd..7698cadd4ff9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -335,10 +335,6 @@ struct iwl_csr_params { * @fw_name_pre: Firmware filename prefix. The api version and extension * (.ucode) will be added to filename before loading from disk. The * filename is constructed as fw_name_pre.ucode. - * @fw_name_pre_b_or_c_step: same as @fw_name_pre, only for b or c steps - * (if supported) - * @fw_name_pre_rf_next_step: same as @fw_name_pre_b_or_c_step, only for rf - * next step. Supported only in integrated solutions. * @ucode_api_max: Highest version of uCode API supported by driver. * @ucode_api_min: Lowest version of uCode API supported by driver. * @max_inst_size: The maximal length of the fw inst section (only DVM) @@ -392,8 +388,6 @@ struct iwl_cfg { /* params specific to an individual device within a device family */ const char *name; const char *fw_name_pre; - const char *fw_name_pre_b_or_c_step; - const char *fw_name_pre_rf_next_step; /* params not likely to change within a device family */ const struct iwl_base_params *base_params; /* params likely to change within a device family */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index bf1be985f36b..2efa1dfe9b4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -210,18 +210,15 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) { const struct iwl_cfg *cfg = drv->trans->cfg; char tag[8]; - const char *fw_pre_name; if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_9000 && - (CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_B_STEP || - CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_C_STEP)) - fw_pre_name = cfg->fw_name_pre_b_or_c_step; - else if (drv->trans->cfg->integrated && - CSR_HW_RFID_STEP(drv->trans->hw_rf_id) == SILICON_B_STEP && - cfg->fw_name_pre_rf_next_step) - fw_pre_name = cfg->fw_name_pre_rf_next_step; - else - fw_pre_name = cfg->fw_name_pre; + (CSR_HW_REV_STEP(drv->trans->hw_rev) != SILICON_B_STEP && + CSR_HW_REV_STEP(drv->trans->hw_rev) != SILICON_C_STEP)) { + IWL_ERR(drv, + "Only HW steps B and C are currently supported (0x%0x)\n", + drv->trans->hw_rev); + return -EINVAL; + } if (first) { drv->fw_index = cfg->ucode_api_max; @@ -235,15 +232,13 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) IWL_ERR(drv, "no suitable firmware found!\n"); if (cfg->ucode_api_min == cfg->ucode_api_max) { - IWL_ERR(drv, "%s%d is required\n", fw_pre_name, + IWL_ERR(drv, "%s%d is required\n", cfg->fw_name_pre, cfg->ucode_api_max); } else { IWL_ERR(drv, "minimum version required: %s%d\n", - fw_pre_name, - cfg->ucode_api_min); + cfg->fw_name_pre, cfg->ucode_api_min); IWL_ERR(drv, "maximum version supported: %s%d\n", - fw_pre_name, - cfg->ucode_api_max); + cfg->fw_name_pre, cfg->ucode_api_max); } IWL_ERR(drv, @@ -252,7 +247,7 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) } snprintf(drv->firmware_name, sizeof(drv->firmware_name), "%s%s.ucode", - fw_pre_name, tag); + cfg->fw_name_pre, tag); IWL_DEBUG_INFO(drv, "attempting to load firmware '%s'\n", drv->firmware_name); From c688e3964a43c85ed27137c9df75072991140922 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 4 Oct 2018 15:20:38 +0300 Subject: [PATCH 0184/1436] iwlwifi: pcie: remove unnecessary iwl_pcie_enable_rx_wake() function This function was only used by 9000 A-step devices, which we don't support anymore, so it can be removed. Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/pcie/internal.h | 2 -- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 26 ------------------- .../net/wireless/intel/iwlwifi/pcie/trans.c | 4 --- 3 files changed, 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index d6fc6ce73e0a..971785b17e09 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1029,8 +1029,6 @@ static inline int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans); int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans); -void iwl_pcie_enable_rx_wake(struct iwl_trans *trans, bool enable); - void iwl_pcie_rx_allocator_work(struct work_struct *data); /* common functions that are used by gen2 transport */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index e965cc588850..541da1d44762 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -868,30 +868,6 @@ static void iwl_pcie_rx_hw_init(struct iwl_trans *trans, struct iwl_rxq *rxq) iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE); } -void iwl_pcie_enable_rx_wake(struct iwl_trans *trans, bool enable) -{ - if (trans->cfg->device_family != IWL_DEVICE_FAMILY_9000) - return; - - if (CSR_HW_REV_STEP(trans->hw_rev) != SILICON_A_STEP) - return; - - if (!trans->cfg->integrated) - return; - - /* - * Turn on the chicken-bits that cause MAC wakeup for RX-related - * values. - * This costs some power, but needed for W/A 9000 integrated A-step - * bug where shadow registers are not in the retention list and their - * value is lost when NIC powers down - */ - iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTRL, - CSR_MAC_SHADOW_REG_CTRL_RX_WAKE); - iwl_set_bit(trans, CSR_MAC_SHADOW_REG_CTL2, - CSR_MAC_SHADOW_REG_CTL2_RX_WAKE); -} - static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -979,8 +955,6 @@ static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans) /* Set interrupt coalescing timer to default (2048 usecs) */ iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF); - - iwl_pcie_enable_rx_wake(trans, true); } void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 55ccfa22b3c0..40e59e701273 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1530,8 +1530,6 @@ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test, iwl_clear_bit(trans, CSR_GP_CNTRL, BIT(trans->cfg->csr->flag_init_done)); - iwl_pcie_enable_rx_wake(trans, false); - if (reset) { /* * reset TX queues -- some of their registers reset during S3 @@ -1558,8 +1556,6 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, return 0; } - iwl_pcie_enable_rx_wake(trans, true); - iwl_set_bit(trans, CSR_GP_CNTRL, BIT(trans->cfg->csr->flag_mac_access_req)); iwl_set_bit(trans, CSR_GP_CNTRL, From 1122135df03664a4aa0f5129bc5d9c9f30b90f17 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 4 Oct 2018 15:23:37 +0300 Subject: [PATCH 0185/1436] iwlwifi: pcie: remove suspend/resume workaround for 9000A devices We don't support 9000 A-step devices anymore, so we can remove the suspend/resume workaround. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 40e59e701273..e72bd9796b5b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3567,17 +3567,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, IWL_DEBUG_INFO(trans, "HW REV: 0x%0x\n", trans->hw_rev); - /* - * 9000-series integrated A-step has a problem with suspend/resume - * and sometimes even causes the whole platform to get stuck. This - * workaround makes the hardware not go into the problematic state. - */ - if (trans->cfg->integrated && - trans->cfg->device_family == IWL_DEVICE_FAMILY_9000 && - CSR_HW_REV_STEP(trans->hw_rev) == SILICON_A_STEP) - iwl_set_bit(trans, CSR_HOST_CHICKEN, - CSR_HOST_CHICKEN_PM_IDLE_SRC_DIS_SB_PME); - #if IS_ENABLED(CONFIG_IWLMVM) trans->hw_rf_id = iwl_read32(trans, CSR_HW_RF_ID); From 7e08baeb3c6d57b333a08306875a5cd49b4d95b5 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Tue, 2 Oct 2018 14:13:48 +0300 Subject: [PATCH 0186/1436] iwlwifi: wrt: add 22000 device familiy prph dump support Add prph dump addresses to support prph dump in 22000 HW. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 166 +++++++++++++++----- 1 file changed, 130 insertions(+), 36 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index d4d6544f8833..5f16879ab26a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -469,6 +469,93 @@ static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = { { .start = 0x00a02400, .end = 0x00a02758 }, }; +static const struct iwl_prph_range iwl_prph_dump_addr_22000[] = { + { .start = 0x00a00000, .end = 0x00a00000 }, + { .start = 0x00a0000c, .end = 0x00a00024 }, + { .start = 0x00a0002c, .end = 0x00a00034 }, + { .start = 0x00a0003c, .end = 0x00a0003c }, + { .start = 0x00a00410, .end = 0x00a00418 }, + { .start = 0x00a00420, .end = 0x00a00420 }, + { .start = 0x00a00428, .end = 0x00a00428 }, + { .start = 0x00a00430, .end = 0x00a0043c }, + { .start = 0x00a00444, .end = 0x00a00444 }, + { .start = 0x00a00840, .end = 0x00a00840 }, + { .start = 0x00a00850, .end = 0x00a00858 }, + { .start = 0x00a01004, .end = 0x00a01008 }, + { .start = 0x00a01010, .end = 0x00a01010 }, + { .start = 0x00a01018, .end = 0x00a01018 }, + { .start = 0x00a01024, .end = 0x00a01024 }, + { .start = 0x00a0102c, .end = 0x00a01034 }, + { .start = 0x00a0103c, .end = 0x00a01040 }, + { .start = 0x00a01048, .end = 0x00a01050 }, + { .start = 0x00a01058, .end = 0x00a01058 }, + { .start = 0x00a01060, .end = 0x00a01070 }, + { .start = 0x00a0108c, .end = 0x00a0108c }, + { .start = 0x00a01c20, .end = 0x00a01c28 }, + { .start = 0x00a01d10, .end = 0x00a01d10 }, + { .start = 0x00a01e28, .end = 0x00a01e2c }, + { .start = 0x00a01e60, .end = 0x00a01e60 }, + { .start = 0x00a01e80, .end = 0x00a01e80 }, + { .start = 0x00a01ea0, .end = 0x00a01ea0 }, + { .start = 0x00a02000, .end = 0x00a0201c }, + { .start = 0x00a02024, .end = 0x00a02024 }, + { .start = 0x00a02040, .end = 0x00a02048 }, + { .start = 0x00a020c0, .end = 0x00a020e0 }, + { .start = 0x00a02400, .end = 0x00a02404 }, + { .start = 0x00a0240c, .end = 0x00a02414 }, + { .start = 0x00a0241c, .end = 0x00a0243c }, + { .start = 0x00a02448, .end = 0x00a024bc }, + { .start = 0x00a024c4, .end = 0x00a024cc }, + { .start = 0x00a02508, .end = 0x00a02508 }, + { .start = 0x00a02510, .end = 0x00a02514 }, + { .start = 0x00a0251c, .end = 0x00a0251c }, + { .start = 0x00a0252c, .end = 0x00a0255c }, + { .start = 0x00a02564, .end = 0x00a025a0 }, + { .start = 0x00a025a8, .end = 0x00a025b4 }, + { .start = 0x00a025c0, .end = 0x00a025c0 }, + { .start = 0x00a025e8, .end = 0x00a025f4 }, + { .start = 0x00a02c08, .end = 0x00a02c18 }, + { .start = 0x00a02c2c, .end = 0x00a02c38 }, + { .start = 0x00a02c68, .end = 0x00a02c78 }, + { .start = 0x00a03000, .end = 0x00a03000 }, + { .start = 0x00a03010, .end = 0x00a03014 }, + { .start = 0x00a0301c, .end = 0x00a0302c }, + { .start = 0x00a03034, .end = 0x00a03038 }, + { .start = 0x00a03040, .end = 0x00a03044 }, + { .start = 0x00a03060, .end = 0x00a03068 }, + { .start = 0x00a03070, .end = 0x00a03070 }, + { .start = 0x00a0307c, .end = 0x00a03084 }, + { .start = 0x00a0308c, .end = 0x00a03090 }, + { .start = 0x00a03098, .end = 0x00a03098 }, + { .start = 0x00a030a0, .end = 0x00a030a0 }, + { .start = 0x00a030a8, .end = 0x00a030b4 }, + { .start = 0x00a030bc, .end = 0x00a030c0 }, + { .start = 0x00a030c8, .end = 0x00a030f4 }, + { .start = 0x00a03100, .end = 0x00a0312c }, + { .start = 0x00a03c00, .end = 0x00a03c5c }, + { .start = 0x00a04400, .end = 0x00a04454 }, + { .start = 0x00a04460, .end = 0x00a04474 }, + { .start = 0x00a044c0, .end = 0x00a044ec }, + { .start = 0x00a04500, .end = 0x00a04504 }, + { .start = 0x00a04510, .end = 0x00a04538 }, + { .start = 0x00a04540, .end = 0x00a04548 }, + { .start = 0x00a04560, .end = 0x00a04560 }, + { .start = 0x00a04570, .end = 0x00a0457c }, + { .start = 0x00a04590, .end = 0x00a04590 }, + { .start = 0x00a04598, .end = 0x00a04598 }, + { .start = 0x00a045c0, .end = 0x00a045f4 }, + { .start = 0x00a0c000, .end = 0x00a0c018 }, + { .start = 0x00a0c020, .end = 0x00a0c028 }, + { .start = 0x00a0c038, .end = 0x00a0c094 }, + { .start = 0x00a0c0c0, .end = 0x00a0c104 }, + { .start = 0x00a0c10c, .end = 0x00a0c118 }, + { .start = 0x00a0c150, .end = 0x00a0c174 }, + { .start = 0x00a0c17c, .end = 0x00a0c188 }, + { .start = 0x00a0c190, .end = 0x00a0c198 }, + { .start = 0x00a0c1a0, .end = 0x00a0c1a8 }, + { .start = 0x00a0c1b0, .end = 0x00a0c1b8 }, +}; + static void iwl_read_prph_block(struct iwl_trans *trans, u32 start, u32 len_bytes, __le32 *data) { @@ -478,15 +565,20 @@ static void iwl_read_prph_block(struct iwl_trans *trans, u32 start, *data++ = cpu_to_le32(iwl_read_prph_no_grab(trans, start + i)); } -static void iwl_dump_prph(struct iwl_trans *trans, - struct iwl_fw_error_dump_data **data, +static void iwl_dump_prph(struct iwl_fw_runtime *fwrt, const struct iwl_prph_range *iwl_prph_dump_addr, - u32 range_len) + u32 range_len, void *ptr) { struct iwl_fw_error_dump_prph *prph; + struct iwl_trans *trans = fwrt->trans; + struct iwl_fw_error_dump_data **data = + (struct iwl_fw_error_dump_data **)ptr; unsigned long flags; u32 i; + if (!data) + return; + IWL_DEBUG_INFO(trans, "WRT PRPH dump\n"); if (!iwl_trans_grab_nic_access(trans, &flags)) @@ -552,37 +644,47 @@ static struct scatterlist *alloc_sgtable(int size) return table; } -static int iwl_fw_get_prph_len(struct iwl_fw_runtime *fwrt) +static void iwl_fw_get_prph_len(struct iwl_fw_runtime *fwrt, + const struct iwl_prph_range *iwl_prph_dump_addr, + u32 range_len, void *ptr) { - u32 prph_len = 0; - int i; + u32 *prph_len = (u32 *)ptr; + int i, num_bytes_in_chunk; - for (i = 0; i < ARRAY_SIZE(iwl_prph_dump_addr_comm); - i++) { + if (!prph_len) + return; + + for (i = 0; i < range_len; i++) { /* The range includes both boundaries */ - int num_bytes_in_chunk = - iwl_prph_dump_addr_comm[i].end - - iwl_prph_dump_addr_comm[i].start + 4; + num_bytes_in_chunk = + iwl_prph_dump_addr[i].end - + iwl_prph_dump_addr[i].start + 4; - prph_len += sizeof(struct iwl_fw_error_dump_data) + + *prph_len += sizeof(struct iwl_fw_error_dump_data) + sizeof(struct iwl_fw_error_dump_prph) + num_bytes_in_chunk; } +} - if (fwrt->trans->cfg->mq_rx_supported) { - for (i = 0; i < - ARRAY_SIZE(iwl_prph_dump_addr_9000); i++) { - /* The range includes both boundaries */ - int num_bytes_in_chunk = - iwl_prph_dump_addr_9000[i].end - - iwl_prph_dump_addr_9000[i].start + 4; +static void iwl_fw_prph_handler(struct iwl_fw_runtime *fwrt, void *ptr, + void (*handler)(struct iwl_fw_runtime *, + const struct iwl_prph_range *, + u32, void *)) +{ + u32 range_len; - prph_len += sizeof(struct iwl_fw_error_dump_data) + - sizeof(struct iwl_fw_error_dump_prph) + - num_bytes_in_chunk; + if (fwrt->trans->cfg->device_family >= IWL_DEVICE_FAMILY_22000) { + range_len = ARRAY_SIZE(iwl_prph_dump_addr_22000); + handler(fwrt, iwl_prph_dump_addr_22000, range_len, ptr); + } else { + range_len = ARRAY_SIZE(iwl_prph_dump_addr_comm); + handler(fwrt, iwl_prph_dump_addr_comm, range_len, ptr); + + if (fwrt->trans->cfg->mq_rx_supported) { + range_len = ARRAY_SIZE(iwl_prph_dump_addr_9000); + handler(fwrt, iwl_prph_dump_addr_9000, range_len, ptr); } } - return prph_len; } static void iwl_fw_dump_mem(struct iwl_fw_runtime *fwrt, @@ -747,9 +849,9 @@ _iwl_fw_error_dump(struct iwl_fw_runtime *fwrt, fifo_len += iwl_fw_txf_len(fwrt, mem_cfg); /* Make room for PRPH registers */ - if (!fwrt->trans->cfg->gen2 && - iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_PRPH)) - prph_len += iwl_fw_get_prph_len(fwrt); + if (iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_PRPH)) + iwl_fw_prph_handler(fwrt, &prph_len, + iwl_fw_get_prph_len); if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000 && iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_RADIO_REG)) @@ -941,16 +1043,8 @@ _iwl_fw_error_dump(struct iwl_fw_runtime *fwrt, if (iwl_fw_dbg_is_paging_enabled(fwrt)) iwl_dump_paging(fwrt, &dump_data); - if (prph_len) { - iwl_dump_prph(fwrt->trans, &dump_data, - iwl_prph_dump_addr_comm, - ARRAY_SIZE(iwl_prph_dump_addr_comm)); - - if (fwrt->trans->cfg->mq_rx_supported) - iwl_dump_prph(fwrt->trans, &dump_data, - iwl_prph_dump_addr_9000, - ARRAY_SIZE(iwl_prph_dump_addr_9000)); - } + if (prph_len) + iwl_fw_prph_handler(fwrt, &dump_data, iwl_dump_prph); out: dump_file->file_len = cpu_to_le32(file_len); From c281f13792f64372f49b63eab4c1ce88282448d2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Oct 2018 09:35:00 +0200 Subject: [PATCH 0187/1436] iwlwifi: mvm: make NVM access actually fail on failures On any failure, including if we crash the firmware or get garbage data, we currently ignore this and pretend the OTP was empty. Clearly, this isn't typically the case. In cases other than the firmware saying it can't read the requested section, or the section having ended, make the access actually fail and trickle the error up through the layers to fail init. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 6fc5cc1f2b5b..f08934b03cf3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -179,7 +179,7 @@ static int iwl_nvm_read_chunk(struct iwl_mvm *mvm, u16 section, IWL_DEBUG_EEPROM(mvm->trans->dev, "NVM access command failed with status %d (device: %s)\n", ret, mvm->cfg->name); - ret = -EIO; + ret = -ENODATA; } goto exit; } @@ -380,8 +380,12 @@ int iwl_nvm_init(struct iwl_mvm *mvm) /* we override the constness for initial read */ ret = iwl_nvm_read_section(mvm, section, nvm_buffer, size_read); - if (ret < 0) + if (ret == -ENODATA) { + ret = 0; continue; + } + if (ret < 0) + break; size_read += ret; temp = kmemdup(nvm_buffer, ret, GFP_KERNEL); if (!temp) { @@ -454,7 +458,7 @@ int iwl_nvm_init(struct iwl_mvm *mvm) IWL_DEBUG_EEPROM(mvm->trans->dev, "nvm version = %x\n", mvm->nvm_data->nvm_version); - return 0; + return ret < 0 ? ret : 0; } struct iwl_mcc_update_resp * From cfbc6c4c5b91c7725ef14465b98ac347d31f2334 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 21 Aug 2018 15:23:39 +0300 Subject: [PATCH 0188/1436] iwlwifi: mvm: support mac80211 TXQs model Move to use the new mac80211 TXQs implementation. This has quite a few benefits for us. We can get rid of the awkward mapping of DQA to mac80211 queues. We can stop buffering traffic while waiting for the queue to be allocated. We can also use mac80211 AMSDUs instead of building it ourselves. The usage is pretty simple: Each ieee80211_txq contains iwl_mvm_txq. There is such a queue for each TID, and one for management frames. We keep having static AP queues for probes and non-bufferable MMPDUs, along with broadcast and multicast queues. Those are being used from the "old" TX invocation path - iwl_mvm_mac_tx. When there is a new frame in a TXQ, iwl_mvm_mac_wake_tx is being called, and either invokes the TX path, or allocates the queue if it does not exist. Most of the TX path is left untouched, although we can consider cleaning it up some more, for example get rid of the duplication of txq_id in both iwl_mvm_txq and iwl_mvm_dqa_txq_info. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 8 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 5 +- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 72 +--- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 113 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 54 ++- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 87 +++-- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 360 +++++++----------- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 4 - drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 71 +--- 9 files changed, 331 insertions(+), 443 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 01b5338201d6..36ed7d6fc971 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2125,7 +2125,6 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) file->private_data = inode->i_private; - ieee80211_stop_queues(mvm->hw); synchronize_net(); mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3; @@ -2140,10 +2139,9 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) rtnl_unlock(); if (err > 0) err = -EINVAL; - if (err) { - ieee80211_wake_queues(mvm->hw); + if (err) return err; - } + mvm->d3_test_active = true; mvm->keep_vif = NULL; return 0; @@ -2223,8 +2221,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d3_test_disconn_work_iter, mvm->keep_vif); - ieee80211_wake_queues(mvm->hw); - return 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 0d6c313b6669..27582d70d45a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -295,7 +295,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, struct iwl_notification_wait alive_wait; struct iwl_mvm_alive_data alive_data; const struct fw_img *fw; - int ret, i; + int ret; enum iwl_ucode_type old_type = mvm->fwrt.cur_fw_img; static const u16 alive_cmd[] = { MVM_ALIVE }; @@ -373,9 +373,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, mvm->queue_info[IWL_MVM_DQA_CMD_QUEUE].tid_bitmap = BIT(IWL_MAX_TID_COUNT + 2); - for (i = 0; i < IEEE80211_MAX_QUEUES; i++) - atomic_set(&mvm->mac80211_queue_stop_count[i], 0); - set_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status); #ifdef CONFIG_IWLWIFI_DEBUGFS iwl_fw_set_dbg_rec_on(&mvm->fwrt); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index eb66269ddf73..a79c79701cc4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -97,11 +97,6 @@ struct iwl_mvm_mac_iface_iterator_data { bool found_vif; }; -struct iwl_mvm_hw_queues_iface_iterator_data { - struct ieee80211_vif *exclude_vif; - unsigned long used_hw_queues; -}; - static void iwl_mvm_mac_tsf_id_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) { @@ -208,61 +203,6 @@ static void iwl_mvm_mac_tsf_id_iter(void *_data, u8 *mac, data->preferred_tsf = NUM_TSF_IDS; } -/* - * Get the mask of the queues used by the vif - */ -u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif) -{ - u32 qmask = 0, ac; - - if (vif->type == NL80211_IFTYPE_P2P_DEVICE) - return BIT(IWL_MVM_OFFCHANNEL_QUEUE); - - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - if (vif->hw_queue[ac] != IEEE80211_INVAL_HW_QUEUE) - qmask |= BIT(vif->hw_queue[ac]); - } - - if (vif->type == NL80211_IFTYPE_AP || - vif->type == NL80211_IFTYPE_ADHOC) - qmask |= BIT(vif->cab_queue); - - return qmask; -} - -static void iwl_mvm_iface_hw_queues_iter(void *_data, u8 *mac, - struct ieee80211_vif *vif) -{ - struct iwl_mvm_hw_queues_iface_iterator_data *data = _data; - - /* exclude the given vif */ - if (vif == data->exclude_vif) - return; - - data->used_hw_queues |= iwl_mvm_mac_get_queues_mask(vif); -} - -unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm, - struct ieee80211_vif *exclude_vif) -{ - struct iwl_mvm_hw_queues_iface_iterator_data data = { - .exclude_vif = exclude_vif, - .used_hw_queues = - BIT(IWL_MVM_OFFCHANNEL_QUEUE) | - BIT(mvm->aux_queue) | - BIT(IWL_MVM_DQA_GCAST_QUEUE), - }; - - lockdep_assert_held(&mvm->mutex); - - /* mark all VIF used hw queues */ - ieee80211_iterate_active_interfaces_atomic( - mvm->hw, IEEE80211_IFACE_ITER_RESUME_ALL, - iwl_mvm_iface_hw_queues_iter, &data); - - return data.used_hw_queues; -} - static void iwl_mvm_mac_iface_iterator(void *_data, u8 *mac, struct ieee80211_vif *vif) { @@ -360,8 +300,6 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) mvm->hw, IEEE80211_IFACE_ITER_RESUME_ALL, iwl_mvm_mac_iface_iterator, &data); - used_hw_queues = iwl_mvm_get_used_hw_queues(mvm, vif); - /* * In the case we're getting here during resume, it's similar to * firmware restart, and with RESUME_ALL the iterator will find @@ -416,9 +354,6 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * the ones here - no real limit */ queue_limit = IEEE80211_MAX_QUEUES; - BUILD_BUG_ON(IEEE80211_MAX_QUEUES > - BITS_PER_BYTE * - sizeof(mvm->hw_queue_to_mac80211[0])); /* * Find available queues, and allocate them to the ACs. When in @@ -446,9 +381,6 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * queue value (when queue is enabled). */ mvmvif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; - vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; - } else { - vif->cab_queue = IEEE80211_INVAL_HW_QUEUE; } mvmvif->bcast_sta.sta_id = IWL_MVM_INVALID_STA; @@ -462,8 +394,6 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif) exit_fail: memset(mvmvif, 0, sizeof(struct iwl_mvm_vif)); - memset(vif->hw_queue, IEEE80211_INVAL_HW_QUEUE, sizeof(vif->hw_queue)); - vif->cab_queue = IEEE80211_INVAL_HW_QUEUE; return ret; } @@ -1185,7 +1115,7 @@ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) - ctxt_ap->mcast_qid = cpu_to_le32(vif->cab_queue); + ctxt_ap->mcast_qid = cpu_to_le32(mvmvif->cab_queue); /* * Only set the beacon time when the MAC is being added, when we diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 310a36dd33cf..58bb92b0ef05 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -425,7 +425,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, SPECTRUM_MGMT); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); - ieee80211_hw_set(hw, QUEUE_CONTROL); ieee80211_hw_set(hw, WANT_MONITOR_VIF); ieee80211_hw_set(hw, SUPPORTS_PS); ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); @@ -439,6 +438,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) ieee80211_hw_set(hw, NEEDS_UNIQUE_STA_ADDR); ieee80211_hw_set(hw, DEAUTH_NEED_MGD_TX_PREP); ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); + ieee80211_hw_set(hw, BUFF_MMPDU_TXQ); + ieee80211_hw_set(hw, STA_MMPDU_TXQ); if (iwl_mvm_has_tlc_offload(mvm)) { ieee80211_hw_set(hw, TX_AMPDU_SETUP_IN_HW); @@ -549,6 +550,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->sta_data_size = sizeof(struct iwl_mvm_sta); hw->vif_data_size = sizeof(struct iwl_mvm_vif); hw->chanctx_data_size = sizeof(u16); + hw->txq_data_size = sizeof(struct iwl_mvm_txq); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_P2P_CLIENT) | @@ -798,7 +800,6 @@ static bool iwl_mvm_defer_tx(struct iwl_mvm *mvm, goto out; __skb_queue_tail(&mvm->d0i3_tx, skb); - ieee80211_stop_queues(mvm->hw); /* trigger wakeup */ iwl_mvm_ref(mvm, IWL_MVM_REF_TX); @@ -818,13 +819,15 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, struct ieee80211_sta *sta = control->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; + bool offchannel = IEEE80211_SKB_CB(skb)->flags & + IEEE80211_TX_CTL_TX_OFFCHAN; if (iwl_mvm_is_radio_killed(mvm)) { IWL_DEBUG_DROP(mvm, "Dropping - RF/CT KILL\n"); goto drop; } - if (info->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && + if (offchannel && !test_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status) && !test_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) goto drop; @@ -837,8 +840,8 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, sta = NULL; /* If there is no sta, and it's not offchannel - send through AP */ - if (info->control.vif->type == NL80211_IFTYPE_STATION && - info->hw_queue != IWL_MVM_OFFCHANNEL_QUEUE && !sta) { + if (!sta && info->control.vif->type == NL80211_IFTYPE_STATION && + !offchannel) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(info->control.vif); u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id); @@ -866,6 +869,77 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, ieee80211_free_txskb(hw, skb); } +void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + struct sk_buff *skb = NULL; + + spin_lock(&mvmtxq->tx_path_lock); + + rcu_read_lock(); + while (likely(!mvmtxq->stopped && + (mvm->trans->system_pm_mode == + IWL_PLAT_PM_MODE_DISABLED))) { + skb = ieee80211_tx_dequeue(hw, txq); + + if (!skb) + break; + + if (!txq->sta) + iwl_mvm_tx_skb_non_sta(mvm, skb); + else + iwl_mvm_tx_skb(mvm, skb, txq->sta); + } + rcu_read_unlock(); + + spin_unlock(&mvmtxq->tx_path_lock); +} + +static void iwl_mvm_mac_wake_tx_queue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + + /* + * Please note that racing is handled very carefully here: + * mvmtxq->txq_id is updated during allocation, and mvmtxq->list is + * deleted afterwards. + * This means that if: + * mvmtxq->txq_id != INVALID_QUEUE && list_empty(&mvmtxq->list): + * queue is allocated and we can TX. + * mvmtxq->txq_id != INVALID_QUEUE && !list_empty(&mvmtxq->list): + * a race, should defer the frame. + * mvmtxq->txq_id == INVALID_QUEUE && list_empty(&mvmtxq->list): + * need to allocate the queue and defer the frame. + * mvmtxq->txq_id == INVALID_QUEUE && !list_empty(&mvmtxq->list): + * queue is already scheduled for allocation, no need to allocate, + * should defer the frame. + */ + + /* If the queue is allocated TX and return. */ + if (!txq->sta || mvmtxq->txq_id != IWL_MVM_INVALID_QUEUE) { + /* + * Check that list is empty to avoid a race where txq_id is + * already updated, but the queue allocation work wasn't + * finished + */ + if (unlikely(txq->sta && !list_empty(&mvmtxq->list))) + return; + + iwl_mvm_mac_itxq_xmit(hw, txq); + return; + } + + /* The list is being deleted only after the queue is fully allocated. */ + if (!list_empty(&mvmtxq->list)) + return; + + list_add_tail(&mvmtxq->list, &mvm->add_stream_txqs); + schedule_work(&mvm->add_stream_wk); +} + static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) { if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) @@ -1107,7 +1181,6 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm) iwl_mvm_reset_phy_ctxts(mvm); memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table)); - memset(mvm->sta_deferred_frames, 0, sizeof(mvm->sta_deferred_frames)); memset(&mvm->last_bt_notif, 0, sizeof(mvm->last_bt_notif)); memset(&mvm->last_bt_ci_cmd, 0, sizeof(mvm->last_bt_ci_cmd)); @@ -2883,32 +2956,6 @@ iwl_mvm_tdls_check_trigger(struct iwl_mvm *mvm, peer_addr, action); } -static void iwl_mvm_purge_deferred_tx_frames(struct iwl_mvm *mvm, - struct iwl_mvm_sta *mvm_sta) -{ - struct iwl_mvm_tid_data *tid_data; - struct sk_buff *skb; - int i; - - spin_lock_bh(&mvm_sta->lock); - for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { - tid_data = &mvm_sta->tid_data[i]; - - while ((skb = __skb_dequeue(&tid_data->deferred_tx_frames))) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - /* - * The first deferred frame should've stopped the MAC - * queues, so we should never get a second deferred - * frame for the RA/TID. - */ - iwl_mvm_start_mac_queues(mvm, BIT(info->hw_queue)); - ieee80211_free_txskb(mvm->hw, skb); - } - } - spin_unlock_bh(&mvm_sta->lock); -} - static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -2942,7 +2989,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, */ if (old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST) { - iwl_mvm_purge_deferred_tx_frames(mvm, mvm_sta); flush_work(&mvm->add_stream_wk); /* @@ -4680,6 +4726,7 @@ static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw) const struct ieee80211_ops iwl_mvm_hw_ops = { .tx = iwl_mvm_mac_tx, + .wake_tx_queue = iwl_mvm_mac_wake_tx_queue, .ampdu_action = iwl_mvm_mac_ampdu_action, .start = iwl_mvm_mac_start, .reconfig_complete = iwl_mvm_mac_reconfig_complete, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 1aa690e081ff..39ee3ace59b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -778,6 +778,40 @@ struct iwl_mvm_geo_profile { u8 values[ACPI_GEO_TABLE_SIZE]; }; +struct iwl_mvm_txq { + struct list_head list; + u16 txq_id; + /* Protects TX path invocation from two places */ + spinlock_t tx_path_lock; + bool stopped; +}; + +static inline struct iwl_mvm_txq * +iwl_mvm_txq_from_mac80211(struct ieee80211_txq *txq) +{ + return (void *)txq->drv_priv; +} + +static inline struct iwl_mvm_txq * +iwl_mvm_txq_from_tid(struct ieee80211_sta *sta, u8 tid) +{ + if (tid == IWL_MAX_TID_COUNT) + tid = IEEE80211_NUM_TIDS; + + return (void *)sta->txq[tid]->drv_priv; +} + +/** + * struct iwl_mvm_tvqm_txq_info - maps TVQM hw queue to tid + * + * @sta_id: sta id + * @txq_tid: txq tid + */ +struct iwl_mvm_tvqm_txq_info { + u8 sta_id; + u8 txq_tid; +}; + struct iwl_mvm_dqa_txq_info { u8 ra_sta_id; /* The RA this queue is mapped to, if exists */ bool reserved; /* Is this the TXQ reserved for a STA */ @@ -843,13 +877,13 @@ struct iwl_mvm { u64 on_time_scan; } radio_stats, accu_radio_stats; - u16 hw_queue_to_mac80211[IWL_MAX_TVQM_QUEUES]; - - struct iwl_mvm_dqa_txq_info queue_info[IWL_MAX_HW_QUEUES]; + struct list_head add_stream_txqs; + union { + struct iwl_mvm_dqa_txq_info queue_info[IWL_MAX_HW_QUEUES]; + struct iwl_mvm_tvqm_txq_info tvqm_info[IWL_MAX_TVQM_QUEUES]; + }; struct work_struct add_stream_wk; /* To add streams to queues */ - atomic_t mac80211_queue_stop_count[IEEE80211_MAX_QUEUES]; - const char *nvm_file_name; struct iwl_nvm_data *nvm_data; /* NVM sections */ @@ -863,7 +897,6 @@ struct iwl_mvm { /* data related to data path */ struct iwl_rx_phy_info last_phy_info; struct ieee80211_sta __rcu *fw_id_to_mac_id[IWL_MVM_STATION_COUNT]; - unsigned long sta_deferred_frames[BITS_TO_LONGS(IWL_MVM_STATION_COUNT)]; u8 rx_ba_sessions; /* configured by mac80211 */ @@ -1470,6 +1503,8 @@ void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb, void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, struct ieee80211_tx_info *info, struct ieee80211_sta *sta, __le16 fc); +void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq); + #ifdef CONFIG_IWLWIFI_DEBUG const char *iwl_mvm_get_tx_fail_reason(u32 status); #else @@ -1599,7 +1634,6 @@ int iwl_mvm_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif); int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, bool force_assoc_off, const u8 *bssid_override); int iwl_mvm_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif); -u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif); int iwl_mvm_mac_ctxt_beacon_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, @@ -1615,8 +1649,6 @@ void iwl_mvm_window_status_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); void iwl_mvm_mac_ctxt_recalc_tsf_id(struct iwl_mvm *mvm, struct ieee80211_vif *vif); -unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm, - struct ieee80211_vif *exclude_vif); void iwl_mvm_probe_resp_data_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb); void iwl_mvm_channel_switch_noa_notif(struct iwl_mvm *mvm, @@ -1906,10 +1938,6 @@ static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm) iwl_trans_stop_device(mvm->trans); } -/* Stop/start all mac queues in a given bitmap */ -void iwl_mvm_start_mac_queues(struct iwl_mvm *mvm, unsigned long mq); -void iwl_mvm_stop_mac_queues(struct iwl_mvm *mvm, unsigned long mq); - /* Re-configure the SCD for a queue that has already been configured */ int iwl_mvm_reconfig_scd(struct iwl_mvm *mvm, int queue, int fifo, int sta_id, int tid, int frame_limit, u16 ssn); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 30c5127034a0..d257c143e624 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -685,6 +685,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, INIT_DELAYED_WORK(&mvm->tdls_cs.dwork, iwl_mvm_tdls_ch_switch_work); INIT_DELAYED_WORK(&mvm->scan_timeout_dwork, iwl_mvm_scan_timeout_wk); INIT_WORK(&mvm->add_stream_wk, iwl_mvm_add_new_dqa_stream_wk); + INIT_LIST_HEAD(&mvm->add_stream_txqs); spin_lock_init(&mvm->d0i3_tx_lock); spin_lock_init(&mvm->refs_lock); @@ -1079,24 +1080,6 @@ static void iwl_mvm_rx_mq(struct iwl_op_mode *op_mode, iwl_mvm_rx_common(mvm, rxb, pkt); } -void iwl_mvm_stop_mac_queues(struct iwl_mvm *mvm, unsigned long mq) -{ - int q; - - if (WARN_ON_ONCE(!mq)) - return; - - for_each_set_bit(q, &mq, IEEE80211_MAX_QUEUES) { - if (atomic_inc_return(&mvm->mac80211_queue_stop_count[q]) > 1) { - IWL_DEBUG_TX_QUEUES(mvm, - "mac80211 %d already stopped\n", q); - continue; - } - - ieee80211_stop_queue(mvm->hw, q); - } -} - static void iwl_mvm_async_cb(struct iwl_op_mode *op_mode, const struct iwl_device_cmd *cmd) { @@ -1109,38 +1092,66 @@ static void iwl_mvm_async_cb(struct iwl_op_mode *op_mode, iwl_trans_block_txq_ptrs(mvm->trans, false); } -static void iwl_mvm_stop_sw_queue(struct iwl_op_mode *op_mode, int hw_queue) +static void iwl_mvm_queue_state_change(struct iwl_op_mode *op_mode, + int hw_queue, bool start) { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); - unsigned long mq = mvm->hw_queue_to_mac80211[hw_queue]; + struct ieee80211_sta *sta; + struct ieee80211_txq *txq; + struct iwl_mvm_txq *mvmtxq; + int i; + unsigned long tid_bitmap; + struct iwl_mvm_sta *mvmsta; + u8 sta_id; - iwl_mvm_stop_mac_queues(mvm, mq); -} + sta_id = iwl_mvm_has_new_tx_api(mvm) ? + mvm->tvqm_info[hw_queue].sta_id : + mvm->queue_info[hw_queue].ra_sta_id; -void iwl_mvm_start_mac_queues(struct iwl_mvm *mvm, unsigned long mq) -{ - int q; - - if (WARN_ON_ONCE(!mq)) + if (WARN_ON_ONCE(sta_id >= ARRAY_SIZE(mvm->fw_id_to_mac_id))) return; - for_each_set_bit(q, &mq, IEEE80211_MAX_QUEUES) { - if (atomic_dec_return(&mvm->mac80211_queue_stop_count[q]) > 0) { - IWL_DEBUG_TX_QUEUES(mvm, - "mac80211 %d still stopped\n", q); - continue; - } + rcu_read_lock(); - ieee80211_wake_queue(mvm->hw, q); + sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]); + if (IS_ERR_OR_NULL(sta)) + goto out; + mvmsta = iwl_mvm_sta_from_mac80211(sta); + + if (iwl_mvm_has_new_tx_api(mvm)) { + int tid = mvm->tvqm_info[hw_queue].txq_tid; + + tid_bitmap = BIT(tid); + } else { + tid_bitmap = mvm->queue_info[hw_queue].tid_bitmap; } + + for_each_set_bit(i, &tid_bitmap, IWL_MAX_TID_COUNT + 1) { + int tid = i; + + if (tid == IWL_MAX_TID_COUNT) + tid = IEEE80211_NUM_TIDS; + + txq = sta->txq[tid]; + mvmtxq = iwl_mvm_txq_from_mac80211(txq); + mvmtxq->stopped = !start; + + if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) + iwl_mvm_mac_itxq_xmit(mvm->hw, txq); + } + +out: + rcu_read_unlock(); +} + +static void iwl_mvm_stop_sw_queue(struct iwl_op_mode *op_mode, int hw_queue) +{ + iwl_mvm_queue_state_change(op_mode, hw_queue, false); } static void iwl_mvm_wake_sw_queue(struct iwl_op_mode *op_mode, int hw_queue) { - struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); - unsigned long mq = mvm->hw_queue_to_mac80211[hw_queue]; - - iwl_mvm_start_mac_queues(mvm, mq); + iwl_mvm_queue_state_change(op_mode, hw_queue, true); } static void iwl_mvm_set_rfkill_state(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index e28009832da0..09f294c466da 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -356,24 +356,16 @@ static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue, return ret; } -static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, - int mac80211_queue, u8 tid, u8 flags) +static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, + int queue, u8 tid, u8 flags) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, .action = SCD_CFG_DISABLE_QUEUE, }; - bool remove_mac_queue = mac80211_queue != IEEE80211_INVAL_HW_QUEUE; int ret; - if (WARN_ON(remove_mac_queue && mac80211_queue >= IEEE80211_MAX_QUEUES)) - return -EINVAL; - if (iwl_mvm_has_new_tx_api(mvm)) { - if (remove_mac_queue) - mvm->hw_queue_to_mac80211[queue] &= - ~BIT(mac80211_queue); - iwl_trans_txq_free(mvm->trans, queue); return 0; @@ -384,36 +376,15 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, mvm->queue_info[queue].tid_bitmap &= ~BIT(tid); - /* - * If there is another TID with the same AC - don't remove the MAC queue - * from the mapping - */ - if (tid < IWL_MAX_TID_COUNT) { - unsigned long tid_bitmap = - mvm->queue_info[queue].tid_bitmap; - int ac = tid_to_mac80211_ac[tid]; - int i; - - for_each_set_bit(i, &tid_bitmap, IWL_MAX_TID_COUNT) { - if (tid_to_mac80211_ac[i] == ac) - remove_mac_queue = false; - } - } - - if (remove_mac_queue) - mvm->hw_queue_to_mac80211[queue] &= - ~BIT(mac80211_queue); - cmd.action = mvm->queue_info[queue].tid_bitmap ? SCD_CFG_ENABLE_QUEUE : SCD_CFG_DISABLE_QUEUE; if (cmd.action == SCD_CFG_DISABLE_QUEUE) mvm->queue_info[queue].status = IWL_MVM_QUEUE_FREE; IWL_DEBUG_TX_QUEUES(mvm, - "Disabling TXQ #%d tids=0x%x (mac80211 map:0x%x)\n", + "Disabling TXQ #%d tids=0x%x\n", queue, - mvm->queue_info[queue].tid_bitmap, - mvm->hw_queue_to_mac80211[queue]); + mvm->queue_info[queue].tid_bitmap); /* If the queue is still enabled - nothing left to do in this func */ if (cmd.action == SCD_CFG_ENABLE_QUEUE) @@ -423,15 +394,19 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, cmd.tid = mvm->queue_info[queue].txq_tid; /* Make sure queue info is correct even though we overwrite it */ - WARN(mvm->queue_info[queue].tid_bitmap || - mvm->hw_queue_to_mac80211[queue], - "TXQ #%d info out-of-sync - mac map=0x%x, tids=0x%x\n", - queue, mvm->hw_queue_to_mac80211[queue], - mvm->queue_info[queue].tid_bitmap); + WARN(mvm->queue_info[queue].tid_bitmap, + "TXQ #%d info out-of-sync - tids=0x%x\n", + queue, mvm->queue_info[queue].tid_bitmap); /* If we are here - the queue is freed and we can zero out these vals */ mvm->queue_info[queue].tid_bitmap = 0; - mvm->hw_queue_to_mac80211[queue] = 0; + + if (sta) { + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_tid(sta, tid); + + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + } /* Regardless if this is a reserved TXQ for a STA - mark it as false */ mvm->queue_info[queue].reserved = false; @@ -517,9 +492,14 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) spin_lock_bh(&mvmsta->lock); /* Unmap MAC queues and TIDs from this queue */ for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) { + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_tid(sta, tid); + if (mvmsta->tid_data[tid].state == IWL_AGG_ON) disable_agg_tids |= BIT(tid); mvmsta->tid_data[tid].txq_id = IWL_MVM_INVALID_QUEUE; + + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; } mvmsta->tfd_queue_msk &= ~BIT(queue); /* Don't use this queue anymore */ @@ -541,10 +521,11 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) } static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue, + struct ieee80211_sta *old_sta, u8 new_sta_id) { struct iwl_mvm_sta *mvmsta; - u8 txq_curr_ac, sta_id, tid; + u8 sta_id, tid; unsigned long disable_agg_tids = 0; bool same_sta; int ret; @@ -554,7 +535,6 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue, if (WARN_ON(iwl_mvm_has_new_tx_api(mvm))) return -EINVAL; - txq_curr_ac = mvm->queue_info[queue].mac80211_ac; sta_id = mvm->queue_info[queue].ra_sta_id; tid = mvm->queue_info[queue].txq_tid; @@ -570,9 +550,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue, iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids, false); - ret = iwl_mvm_disable_txq(mvm, queue, - mvmsta->vif->hw_queue[txq_curr_ac], - tid, 0); + ret = iwl_mvm_disable_txq(mvm, old_sta, queue, tid, 0); if (ret) { IWL_ERR(mvm, "Failed to free inactive queue %d (ret=%d)\n", @@ -662,16 +640,15 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, * in such a case, otherwise - if no redirection required - it does nothing, * unless the %force param is true. */ -static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, - int ac, int ssn, unsigned int wdg_timeout, - bool force) +static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force, struct iwl_mvm_txq *txq) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, .action = SCD_CFG_DISABLE_QUEUE, }; bool shared_queue; - unsigned long mq; int ret; if (WARN_ON(iwl_mvm_has_new_tx_api(mvm))) @@ -695,14 +672,14 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac]; cmd.tid = mvm->queue_info[queue].txq_tid; - mq = mvm->hw_queue_to_mac80211[queue]; shared_queue = hweight16(mvm->queue_info[queue].tid_bitmap) > 1; IWL_DEBUG_TX_QUEUES(mvm, "Redirecting TXQ #%d to FIFO #%d\n", queue, iwl_mvm_ac_to_tx_fifo[ac]); - /* Stop MAC queues and wait for this queue to empty */ - iwl_mvm_stop_mac_queues(mvm, mq); + /* Stop the queue and wait for it to empty */ + txq->stopped = true; + ret = iwl_trans_wait_tx_queues_empty(mvm->trans, BIT(queue)); if (ret) { IWL_ERR(mvm, "Error draining queue %d before reconfig\n", @@ -743,8 +720,8 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, iwl_trans_txq_set_shared_mode(mvm->trans, queue, true); out: - /* Continue using the MAC queues */ - iwl_mvm_start_mac_queues(mvm, mq); + /* Continue using the queue */ + txq->stopped = false; return ret; } @@ -769,7 +746,7 @@ static int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, return -ENOSPC; } -static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, int mac80211_queue, +static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, u8 sta_id, u8 tid, unsigned int timeout) { int queue, size = IWL_DEFAULT_QUEUE_SIZE; @@ -792,10 +769,7 @@ static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, int mac80211_queue, IWL_DEBUG_TX_QUEUES(mvm, "Enabling TXQ #%d for sta %d tid %d\n", queue, sta_id, tid); - mvm->hw_queue_to_mac80211[queue] |= BIT(mac80211_queue); - IWL_DEBUG_TX_QUEUES(mvm, - "Enabling TXQ #%d (mac80211 map:0x%x)\n", - queue, mvm->hw_queue_to_mac80211[queue]); + IWL_DEBUG_TX_QUEUES(mvm, "Enabling TXQ #%d\n", queue); return queue; } @@ -805,9 +779,10 @@ static int iwl_mvm_sta_alloc_queue_tvqm(struct iwl_mvm *mvm, int tid) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_tid(sta, tid); unsigned int wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); - u8 mac_queue = mvmsta->vif->hw_queue[ac]; int queue = -1; lockdep_assert_held(&mvm->mutex); @@ -815,11 +790,16 @@ static int iwl_mvm_sta_alloc_queue_tvqm(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Allocating queue for sta %d on tid %d\n", mvmsta->sta_id, tid); - queue = iwl_mvm_tvqm_enable_txq(mvm, mac_queue, mvmsta->sta_id, tid, - wdg_timeout); + queue = iwl_mvm_tvqm_enable_txq(mvm, mvmsta->sta_id, tid, wdg_timeout); if (queue < 0) return queue; + if (sta) { + mvmtxq->txq_id = queue; + mvm->tvqm_info[queue].txq_tid = tid; + mvm->tvqm_info[queue].sta_id = mvmsta->sta_id; + } + IWL_DEBUG_TX_QUEUES(mvm, "Allocated queue is %d\n", queue); spin_lock_bh(&mvmsta->lock); @@ -829,8 +809,9 @@ static int iwl_mvm_sta_alloc_queue_tvqm(struct iwl_mvm *mvm, return 0; } -static bool iwl_mvm_update_txq_mapping(struct iwl_mvm *mvm, int queue, - int mac80211_queue, u8 sta_id, u8 tid) +static bool iwl_mvm_update_txq_mapping(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + int queue, u8 sta_id, u8 tid) { bool enable_queue = true; @@ -845,14 +826,6 @@ static bool iwl_mvm_update_txq_mapping(struct iwl_mvm *mvm, int queue, if (mvm->queue_info[queue].tid_bitmap) enable_queue = false; - if (mac80211_queue != IEEE80211_INVAL_HW_QUEUE) { - WARN(mac80211_queue >= - BITS_PER_BYTE * sizeof(mvm->hw_queue_to_mac80211[0]), - "cannot track mac80211 queue %d (queue %d, sta %d, tid %d)\n", - mac80211_queue, queue, sta_id, tid); - mvm->hw_queue_to_mac80211[queue] |= BIT(mac80211_queue); - } - mvm->queue_info[queue].tid_bitmap |= BIT(tid); mvm->queue_info[queue].ra_sta_id = sta_id; @@ -866,16 +839,22 @@ static bool iwl_mvm_update_txq_mapping(struct iwl_mvm *mvm, int queue, mvm->queue_info[queue].txq_tid = tid; } + if (sta) { + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_tid(sta, tid); + + mvmtxq->txq_id = queue; + } + IWL_DEBUG_TX_QUEUES(mvm, - "Enabling TXQ #%d tids=0x%x (mac80211 map:0x%x)\n", - queue, mvm->queue_info[queue].tid_bitmap, - mvm->hw_queue_to_mac80211[queue]); + "Enabling TXQ #%d tids=0x%x\n", + queue, mvm->queue_info[queue].tid_bitmap); return enable_queue; } -static bool iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, - int mac80211_queue, u16 ssn, +static bool iwl_mvm_enable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, + int queue, u16 ssn, const struct iwl_trans_txq_scd_cfg *cfg, unsigned int wdg_timeout) { @@ -895,8 +874,7 @@ static bool iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, return false; /* Send the enabling command if we need to */ - if (!iwl_mvm_update_txq_mapping(mvm, queue, mac80211_queue, - cfg->sta_id, cfg->tid)) + if (!iwl_mvm_update_txq_mapping(mvm, sta, queue, cfg->sta_id, cfg->tid)) return false; inc_ssn = iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, @@ -989,9 +967,10 @@ static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number); - ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, - tid_to_mac80211_ac[tid], ssn, - wdg_timeout, true); + ret = iwl_mvm_redirect_queue(mvm, queue, tid, + tid_to_mac80211_ac[tid], ssn, + wdg_timeout, true, + iwl_mvm_txq_from_tid(sta, tid)); if (ret) { IWL_ERR(mvm, "Failed to redirect TXQ %d\n", queue); return; @@ -1068,11 +1047,9 @@ static bool iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm, * Remove the ones that did. */ for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) { - int mac_queue = mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]; u16 tid_bitmap; mvmsta->tid_data[tid].txq_id = IWL_MVM_INVALID_QUEUE; - mvm->hw_queue_to_mac80211[queue] &= ~BIT(mac_queue); mvm->queue_info[queue].tid_bitmap &= ~BIT(tid); tid_bitmap = mvm->queue_info[queue].tid_bitmap; @@ -1105,10 +1082,6 @@ static bool iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm, * sure all TIDs have existing corresponding mac queues enabled */ tid_bitmap = mvm->queue_info[queue].tid_bitmap; - for_each_set_bit(tid, &tid_bitmap, IWL_MAX_TID_COUNT + 1) { - mvm->hw_queue_to_mac80211[queue] |= - BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]); - } /* If the queue is marked as shared - "unshare" it */ if (hweight16(mvm->queue_info[queue].tid_bitmap) == 1 && @@ -1136,6 +1109,7 @@ static int iwl_mvm_inactivity_check(struct iwl_mvm *mvm, u8 alloc_for_sta) unsigned long unshare_queues = 0; unsigned long changetid_queues = 0; int i, ret, free_queue = -ENOSPC; + struct ieee80211_sta *queue_owner = NULL; lockdep_assert_held(&mvm->mutex); @@ -1201,13 +1175,14 @@ static int iwl_mvm_inactivity_check(struct iwl_mvm *mvm, u8 alloc_for_sta) inactive_tid_bitmap, &unshare_queues, &changetid_queues); - if (ret >= 0 && free_queue < 0) + if (ret >= 0 && free_queue < 0) { + queue_owner = sta; free_queue = ret; + } /* only unlock sta lock - we still need the queue info lock */ spin_unlock_bh(&mvmsta->lock); } - rcu_read_unlock(); /* Reconfigure queues requiring reconfiguation */ for_each_set_bit(i, &unshare_queues, IWL_MAX_HW_QUEUES) @@ -1216,18 +1191,21 @@ static int iwl_mvm_inactivity_check(struct iwl_mvm *mvm, u8 alloc_for_sta) iwl_mvm_change_queue_tid(mvm, i); if (free_queue >= 0 && alloc_for_sta != IWL_MVM_INVALID_STA) { - ret = iwl_mvm_free_inactive_queue(mvm, free_queue, + ret = iwl_mvm_free_inactive_queue(mvm, free_queue, queue_owner, alloc_for_sta); - if (ret) + if (ret) { + rcu_read_unlock(); return ret; + } } + rcu_read_unlock(); + return free_queue; } static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, - struct ieee80211_sta *sta, u8 ac, int tid, - struct ieee80211_hdr *hdr) + struct ieee80211_sta *sta, u8 ac, int tid) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_trans_txq_scd_cfg cfg = { @@ -1238,7 +1216,6 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, }; unsigned int wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); - u8 mac_queue = mvmsta->vif->hw_queue[ac]; int queue = -1; unsigned long disable_agg_tids = 0; enum iwl_mvm_agg_state queue_state; @@ -1257,12 +1234,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number); spin_unlock_bh(&mvmsta->lock); - /* - * Non-QoS, QoS NDP and MGMT frames should go to a MGMT queue, if one - * exists - */ - if (!ieee80211_is_data_qos(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { + if (tid == IWL_MAX_TID_COUNT) { queue = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, IWL_MVM_DQA_MIN_MGMT_QUEUE, IWL_MVM_DQA_MAX_MGMT_QUEUE); @@ -1341,8 +1313,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, } } - inc_ssn = iwl_mvm_enable_txq(mvm, queue, mac_queue, - ssn, &cfg, wdg_timeout); + inc_ssn = iwl_mvm_enable_txq(mvm, sta, queue, ssn, &cfg, wdg_timeout); /* * Mark queue as shared in transport if shared @@ -1384,8 +1355,9 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, } } else { /* Redirect queue, if needed */ - ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, ac, ssn, - wdg_timeout, false); + ret = iwl_mvm_redirect_queue(mvm, queue, tid, ac, ssn, + wdg_timeout, false, + iwl_mvm_txq_from_tid(sta, tid)); if (ret) goto out_err; } @@ -1393,7 +1365,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return 0; out_err: - iwl_mvm_disable_txq(mvm, queue, mac_queue, tid, 0); + iwl_mvm_disable_txq(mvm, sta, queue, tid, 0); return ret; } @@ -1406,87 +1378,34 @@ static inline u8 iwl_mvm_tid_to_ac_queue(int tid) return tid_to_mac80211_ac[tid]; } -static void iwl_mvm_tx_deferred_stream(struct iwl_mvm *mvm, - struct ieee80211_sta *sta, int tid) -{ - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; - struct sk_buff *skb; - struct ieee80211_hdr *hdr; - struct sk_buff_head deferred_tx; - u8 mac_queue; - bool no_queue = false; /* Marks if there is a problem with the queue */ - u8 ac; - - lockdep_assert_held(&mvm->mutex); - - skb = skb_peek(&tid_data->deferred_tx_frames); - if (!skb) - return; - hdr = (void *)skb->data; - - ac = iwl_mvm_tid_to_ac_queue(tid); - mac_queue = IEEE80211_SKB_CB(skb)->hw_queue; - - if (tid_data->txq_id == IWL_MVM_INVALID_QUEUE && - iwl_mvm_sta_alloc_queue(mvm, sta, ac, tid, hdr)) { - IWL_ERR(mvm, - "Can't alloc TXQ for sta %d tid %d - dropping frame\n", - mvmsta->sta_id, tid); - - /* - * Mark queue as problematic so later the deferred traffic is - * freed, as we can do nothing with it - */ - no_queue = true; - } - - __skb_queue_head_init(&deferred_tx); - - /* Disable bottom-halves when entering TX path */ - local_bh_disable(); - spin_lock(&mvmsta->lock); - skb_queue_splice_init(&tid_data->deferred_tx_frames, &deferred_tx); - mvmsta->deferred_traffic_tid_map &= ~BIT(tid); - spin_unlock(&mvmsta->lock); - - while ((skb = __skb_dequeue(&deferred_tx))) - if (no_queue || iwl_mvm_tx_skb(mvm, skb, sta)) - ieee80211_free_txskb(mvm->hw, skb); - local_bh_enable(); - - /* Wake queue */ - iwl_mvm_start_mac_queues(mvm, BIT(mac_queue)); -} - void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) { struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, add_stream_wk); - struct ieee80211_sta *sta; - struct iwl_mvm_sta *mvmsta; - unsigned long deferred_tid_traffic; - int sta_id, tid; mutex_lock(&mvm->mutex); iwl_mvm_inactivity_check(mvm, IWL_MVM_INVALID_STA); - /* Go over all stations with deferred traffic */ - for_each_set_bit(sta_id, mvm->sta_deferred_frames, - IWL_MVM_STATION_COUNT) { - clear_bit(sta_id, mvm->sta_deferred_frames); - sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], - lockdep_is_held(&mvm->mutex)); - if (IS_ERR_OR_NULL(sta)) - continue; + while (!list_empty(&mvm->add_stream_txqs)) { + struct iwl_mvm_txq *mvmtxq; + struct ieee80211_txq *txq; + u8 tid; - mvmsta = iwl_mvm_sta_from_mac80211(sta); - deferred_tid_traffic = mvmsta->deferred_traffic_tid_map; + mvmtxq = list_first_entry(&mvm->add_stream_txqs, + struct iwl_mvm_txq, list); - for_each_set_bit(tid, &deferred_tid_traffic, - IWL_MAX_TID_COUNT + 1) - iwl_mvm_tx_deferred_stream(mvm, sta, tid); + txq = container_of((void *)mvmtxq, struct ieee80211_txq, + drv_priv); + tid = txq->tid; + if (tid == IEEE80211_NUM_TIDS) + tid = IWL_MAX_TID_COUNT; + + iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid); + list_del_init(&mvmtxq->list); + local_bh_disable(); + iwl_mvm_mac_itxq_xmit(mvm->hw, txq); + local_bh_enable(); } mutex_unlock(&mvm->mutex); @@ -1542,10 +1461,11 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm, * Note that re-enabling aggregations isn't done in this function. */ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, - struct iwl_mvm_sta *mvm_sta) + struct ieee80211_sta *sta) { - unsigned int wdg_timeout = - iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false); + struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); + unsigned int wdg = + iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false); int i; struct iwl_trans_txq_scd_cfg cfg = { .sta_id = mvm_sta->sta_id, @@ -1561,23 +1481,18 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; int txq_id = tid_data->txq_id; int ac; - u8 mac_queue; if (txq_id == IWL_MVM_INVALID_QUEUE) continue; - skb_queue_head_init(&tid_data->deferred_tx_frames); - ac = tid_to_mac80211_ac[i]; - mac_queue = mvm_sta->vif->hw_queue[ac]; if (iwl_mvm_has_new_tx_api(mvm)) { IWL_DEBUG_TX_QUEUES(mvm, "Re-mapping sta %d tid %d\n", mvm_sta->sta_id, i); - txq_id = iwl_mvm_tvqm_enable_txq(mvm, mac_queue, - mvm_sta->sta_id, - i, wdg_timeout); + txq_id = iwl_mvm_tvqm_enable_txq(mvm, mvm_sta->sta_id, + i, wdg); tid_data->txq_id = txq_id; /* @@ -1600,8 +1515,7 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, "Re-mapping sta %d tid %d to queue %d\n", mvm_sta->sta_id, i, txq_id); - iwl_mvm_enable_txq(mvm, txq_id, mac_queue, seq, &cfg, - wdg_timeout); + iwl_mvm_enable_txq(mvm, sta, txq_id, seq, &cfg, wdg); mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY; } } @@ -1691,7 +1605,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, if (ret) goto err; - iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta); + iwl_mvm_realloc_queues_after_restart(mvm, sta); sta_update = true; sta_flags = iwl_mvm_has_new_tx_api(mvm) ? 0 : STA_MODIFY_QUEUES; goto update_fw; @@ -1724,9 +1638,17 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, * frames until the queue is allocated */ mvm_sta->tid_data[i].txq_id = IWL_MVM_INVALID_QUEUE; - skb_queue_head_init(&mvm_sta->tid_data[i].deferred_tx_frames); } - mvm_sta->deferred_traffic_tid_map = 0; + + for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_mac80211(sta->txq[i]); + + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + INIT_LIST_HEAD(&mvmtxq->list); + spin_lock_init(&mvmtxq->tx_path_lock); + } + mvm_sta->agg_tids = 0; if (iwl_mvm_has_new_rx_api(mvm) && @@ -1861,9 +1783,9 @@ static int iwl_mvm_rm_sta_common(struct iwl_mvm *mvm, u8 sta_id) static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - struct iwl_mvm_sta *mvm_sta) + struct ieee80211_sta *sta) { - int ac; + struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); int i; lockdep_assert_held(&mvm->mutex); @@ -1872,11 +1794,17 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, if (mvm_sta->tid_data[i].txq_id == IWL_MVM_INVALID_QUEUE) continue; - ac = iwl_mvm_tid_to_ac_queue(i); - iwl_mvm_disable_txq(mvm, mvm_sta->tid_data[i].txq_id, - vif->hw_queue[ac], i, 0); + iwl_mvm_disable_txq(mvm, sta, mvm_sta->tid_data[i].txq_id, i, + 0); mvm_sta->tid_data[i].txq_id = IWL_MVM_INVALID_QUEUE; } + + for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { + struct iwl_mvm_txq *mvmtxq = + iwl_mvm_txq_from_mac80211(sta->txq[i]); + + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + } } int iwl_mvm_wait_sta_queues_empty(struct iwl_mvm *mvm, @@ -1938,7 +1866,7 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm, ret = iwl_mvm_drain_sta(mvm, mvm_sta, false); - iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta); + iwl_mvm_disable_sta_queues(mvm, vif, sta); /* If there is a TXQ still marked as reserved - free it */ if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) { @@ -2044,7 +1972,7 @@ static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue, if (iwl_mvm_has_new_tx_api(mvm)) { int tvqm_queue = - iwl_mvm_tvqm_enable_txq(mvm, *queue, sta_id, + iwl_mvm_tvqm_enable_txq(mvm, sta_id, IWL_MAX_TID_COUNT, wdg_timeout); *queue = tvqm_queue; @@ -2057,7 +1985,7 @@ static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue, .frame_limit = IWL_FRAME_LIMIT, }; - iwl_mvm_enable_txq(mvm, *queue, *queue, 0, &cfg, wdg_timeout); + iwl_mvm_enable_txq(mvm, NULL, *queue, 0, &cfg, wdg_timeout); } } @@ -2135,8 +2063,7 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) lockdep_assert_held(&mvm->mutex); - iwl_mvm_disable_txq(mvm, mvm->snif_queue, mvm->snif_queue, - IWL_MAX_TID_COUNT, 0); + iwl_mvm_disable_txq(mvm, NULL, mvm->snif_queue, IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id); if (ret) IWL_WARN(mvm, "Failed sending remove station\n"); @@ -2195,8 +2122,7 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) bsta->tfd_queue_msk |= BIT(queue); - iwl_mvm_enable_txq(mvm, queue, vif->hw_queue[0], 0, - &cfg, wdg_timeout); + iwl_mvm_enable_txq(mvm, NULL, queue, 0, &cfg, wdg_timeout); } if (vif->type == NL80211_IFTYPE_ADHOC) @@ -2215,8 +2141,7 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * to firmware so enable queue here - after the station was added */ if (iwl_mvm_has_new_tx_api(mvm)) { - queue = iwl_mvm_tvqm_enable_txq(mvm, vif->hw_queue[0], - bsta->sta_id, + queue = iwl_mvm_tvqm_enable_txq(mvm, bsta->sta_id, IWL_MAX_TID_COUNT, wdg_timeout); @@ -2254,7 +2179,7 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, return; } - iwl_mvm_disable_txq(mvm, queue, vif->hw_queue[0], IWL_MAX_TID_COUNT, 0); + iwl_mvm_disable_txq(mvm, NULL, queue, IWL_MAX_TID_COUNT, 0); if (iwl_mvm_has_new_tx_api(mvm)) return; @@ -2377,10 +2302,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * Note that this is done here as we want to avoid making DQA * changes in mac80211 layer. */ - if (vif->type == NL80211_IFTYPE_ADHOC) { - vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; - mvmvif->cab_queue = vif->cab_queue; - } + if (vif->type == NL80211_IFTYPE_ADHOC) + mvmvif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE; /* * While in previous FWs we had to exclude cab queue from TFD queue @@ -2388,9 +2311,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) */ if (!iwl_mvm_has_new_tx_api(mvm) && fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) { - iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0, - &cfg, timeout); - msta->tfd_queue_msk |= BIT(vif->cab_queue); + iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg, + timeout); + msta->tfd_queue_msk |= BIT(mvmvif->cab_queue); } ret = iwl_mvm_add_int_sta_common(mvm, msta, maddr, mvmvif->id, mvmvif->color); @@ -2407,15 +2330,14 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * tfd_queue_mask. */ if (iwl_mvm_has_new_tx_api(mvm)) { - int queue = iwl_mvm_tvqm_enable_txq(mvm, vif->cab_queue, - msta->sta_id, + int queue = iwl_mvm_tvqm_enable_txq(mvm, msta->sta_id, 0, timeout); mvmvif->cab_queue = queue; } else if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) - iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0, - &cfg, timeout); + iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg, + timeout); if (mvmvif->ap_wep_key) { u8 key_offset = iwl_mvm_set_fw_key_idx(mvm); @@ -2446,8 +2368,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0); - iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue, - 0, 0); + iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0); ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id); if (ret) @@ -2976,8 +2897,7 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } if (alloc_queue) - iwl_mvm_enable_txq(mvm, queue, - vif->hw_queue[tid_to_mac80211_ac[tid]], ssn, + iwl_mvm_enable_txq(mvm, sta, queue, ssn, &cfg, wdg_timeout); /* Send ADD_STA command to enable aggs only if the queue isn't shared */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index d52cd888f77d..0614296244b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -297,7 +297,6 @@ enum iwl_mvm_agg_state { /** * struct iwl_mvm_tid_data - holds the states for each RA / TID - * @deferred_tx_frames: deferred TX frames for this RA/TID * @seq_number: the next WiFi sequence number to use * @next_reclaimed: the WiFi sequence number of the next packet to be acked. * This is basically (last acked packet++). @@ -318,7 +317,6 @@ enum iwl_mvm_agg_state { * tpt_meas_start */ struct iwl_mvm_tid_data { - struct sk_buff_head deferred_tx_frames; u16 seq_number; u16 next_reclaimed; /* The rest is Tx AGG related */ @@ -427,8 +425,6 @@ struct iwl_mvm_sta { struct iwl_mvm_key_pn __rcu *ptk_pn[4]; struct iwl_mvm_rxq_dup_data *dup_data; - u16 deferred_traffic_tid_map; - u8 reserved_queue; /* Temporary, until the new TLC will control the Tx protection */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 995fe2a6abbb..dfdfaa9ecfdb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -602,11 +602,12 @@ static void iwl_mvm_skb_prepare_status(struct sk_buff *skb, } static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, - struct ieee80211_tx_info *info, __le16 fc) + struct ieee80211_tx_info *info, + struct ieee80211_hdr *hdr) { - struct iwl_mvm_vif *mvmvif; - - mvmvif = iwl_mvm_vif_from_mac80211(info->control.vif); + struct iwl_mvm_vif *mvmvif = + iwl_mvm_vif_from_mac80211(info->control.vif); + __le16 fc = hdr->frame_control; switch (info->control.vif->type) { case NL80211_IFTYPE_AP: @@ -625,7 +626,9 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, (!ieee80211_is_bufferable_mmpdu(fc) || ieee80211_is_deauth(fc) || ieee80211_is_disassoc(fc))) return mvm->probe_queue; - if (info->hw_queue == info->control.vif->cab_queue) + + if (!ieee80211_has_order(fc) && !ieee80211_is_probe_req(fc) && + is_multicast_ether_addr(hdr->addr1)) return mvmvif->cab_queue; WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC, @@ -634,8 +637,6 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, case NL80211_IFTYPE_P2P_DEVICE: if (ieee80211_is_mgmt(fc)) return mvm->p2p_dev_queue; - if (info->hw_queue == info->control.vif->cab_queue) - return mvmvif->cab_queue; WARN_ON_ONCE(1); return mvm->p2p_dev_queue; @@ -713,6 +714,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) u8 sta_id; int hdrlen = ieee80211_hdrlen(hdr->frame_control); __le16 fc = hdr->frame_control; + bool offchannel = IEEE80211_SKB_CB(skb)->flags & + IEEE80211_TX_CTL_TX_OFFCHAN; int queue = -1; memcpy(&info, skb->cb, sizeof(info)); @@ -720,11 +723,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) return -1; - if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM && - (!info.control.vif || - info.hw_queue != info.control.vif->cab_queue))) - return -1; - if (info.control.vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(info.control.vif); @@ -737,14 +735,12 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) else sta_id = mvmvif->mcast_sta.sta_id; - queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, - hdr->frame_control); - + queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr); } else if (info.control.vif->type == NL80211_IFTYPE_MONITOR) { queue = mvm->snif_queue; sta_id = mvm->snif_sta.sta_id; } else if (info.control.vif->type == NL80211_IFTYPE_STATION && - info.hw_queue == IWL_MVM_OFFCHANNEL_QUEUE) { + offchannel) { /* * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets * that can be used in 2 different types of vifs, P2P & @@ -758,8 +754,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) } } - if (queue < 0) + if (queue < 0) { + IWL_ERR(mvm, "No queue was found. Dropping TX\n"); return -1; + } if (unlikely(ieee80211_is_probe_resp(fc))) iwl_mvm_probe_resp_set_noa(mvm, skb); @@ -1002,34 +1000,6 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, } #endif -static void iwl_mvm_tx_add_stream(struct iwl_mvm *mvm, - struct iwl_mvm_sta *mvm_sta, u8 tid, - struct sk_buff *skb) -{ - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - u8 mac_queue = info->hw_queue; - struct sk_buff_head *deferred_tx_frames; - - lockdep_assert_held(&mvm_sta->lock); - - mvm_sta->deferred_traffic_tid_map |= BIT(tid); - set_bit(mvm_sta->sta_id, mvm->sta_deferred_frames); - - deferred_tx_frames = &mvm_sta->tid_data[tid].deferred_tx_frames; - - skb_queue_tail(deferred_tx_frames, skb); - - /* - * The first deferred frame should've stopped the MAC queues, so we - * should never get a second deferred frame for the RA/TID. - * In case of GSO the first packet may have been split, so don't warn. - */ - if (skb_queue_len(deferred_tx_frames) == 1) { - iwl_mvm_stop_mac_queues(mvm, BIT(mac_queue)); - schedule_work(&mvm->add_stream_wk); - } -} - /* Check if there are any timed-out TIDs on a given shared TXQ */ static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) { @@ -1088,7 +1058,7 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, __le16 fc; u16 seq_number = 0; u8 tid = IWL_MAX_TID_COUNT; - u16 txq_id = info->hw_queue; + u16 txq_id; bool is_ampdu = false; int hdrlen; @@ -1152,14 +1122,7 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM); - /* Check if TXQ needs to be allocated or re-activated */ - if (unlikely(txq_id == IWL_MVM_INVALID_QUEUE)) { - iwl_mvm_tx_add_stream(mvm, mvmsta, tid, skb); - - /* - * The frame is now deferred, and the worker scheduled - * will re-allocate it, so we can free it for now. - */ + if (WARN_ON_ONCE(txq_id == IWL_MVM_INVALID_QUEUE)) { iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); spin_unlock(&mvmsta->lock); return 0; From 438af9698b0f161286c6e5d24255c3c231988b39 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 28 Aug 2018 10:08:35 +0300 Subject: [PATCH 0189/1436] iwlwifi: mvm: support mac80211 AMSDU Support getting mac80211 building AMSDUs for us. Remove GSO support from mvm - we don't need it anymore. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 33 +++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 + .../net/wireless/intel/iwlwifi/mvm/rs-fw.c | 14 ++ drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 14 ++ drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 229 +----------------- 5 files changed, 67 insertions(+), 226 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 58bb92b0ef05..3b24cced3f6f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -440,6 +440,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); ieee80211_hw_set(hw, BUFF_MMPDU_TXQ); ieee80211_hw_set(hw, STA_MMPDU_TXQ); + ieee80211_hw_set(hw, TX_AMSDU); + ieee80211_hw_set(hw, TX_FRAG_LIST); if (iwl_mvm_has_tlc_offload(mvm)) { ieee80211_hw_set(hw, TX_AMPDU_SETUP_IN_HW); @@ -485,6 +487,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES; hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; + hw->max_tx_fragments = mvm->trans->max_skb_frags; BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 6); memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers)); @@ -751,6 +754,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) } hw->netdev_features |= mvm->cfg->features; + hw->netdev_features &= ~(NETIF_F_TSO | NETIF_F_TSO6); if (!iwl_mvm_is_csum_supported(mvm)) { hw->netdev_features &= ~(IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM); @@ -3035,6 +3039,8 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, iwl_mvm_tdls_check_trigger(mvm, vif, sta->addr, NL80211_TDLS_SETUP); } + + sta->max_rc_amsdu_len = 1; } else if (old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_AUTH) { /* @@ -4724,6 +4730,32 @@ static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw) mutex_unlock(&mvm->mutex); } +static bool iwl_mvm_can_hw_csum(struct sk_buff *skb) +{ + u8 protocol = ip_hdr(skb)->protocol; + + if (!IS_ENABLED(CONFIG_INET)) + return false; + + return protocol == IPPROTO_TCP || protocol == IPPROTO_UDP; +} + +static bool iwl_mvm_mac_can_aggregate(struct ieee80211_hw *hw, + struct sk_buff *head, + struct sk_buff *skb) +{ + struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + + /* For now don't aggregate IPv6 in AMSDU */ + if (skb->protocol != htons(ETH_P_IP)) + return false; + + if (!iwl_mvm_is_csum_supported(mvm)) + return true; + + return iwl_mvm_can_hw_csum(skb) == iwl_mvm_can_hw_csum(head); +} + const struct ieee80211_ops iwl_mvm_hw_ops = { .tx = iwl_mvm_mac_tx, .wake_tx_queue = iwl_mvm_mac_wake_tx_queue, @@ -4800,6 +4832,7 @@ const struct ieee80211_ops iwl_mvm_hw_ops = { #endif .get_survey = iwl_mvm_mac_get_survey, .sta_statistics = iwl_mvm_mac_sta_statistics, + .can_aggregate_in_amsdu = iwl_mvm_mac_can_aggregate, #ifdef CONFIG_IWLWIFI_DEBUGFS .sta_add_debugfs = iwl_mvm_sta_add_debugfs, #endif diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 39ee3ace59b6..503e51d32e7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1504,6 +1504,9 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, struct ieee80211_tx_info *info, struct ieee80211_sta *sta, __le16 fc); void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq); +unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, + unsigned int tid); #ifdef CONFIG_IWLWIFI_DEBUG const char *iwl_mvm_get_tx_fail_reason(u32 status); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index a0ea0c160fd6..a28283ff7295 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -315,12 +315,26 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm, if (flags & IWL_TLC_NOTIF_FLAG_AMSDU) { u16 size = le32_to_cpu(notif->amsdu_size); + int i; if (WARN_ON(sta->max_amsdu_len < size)) goto out; mvmsta->amsdu_enabled = le32_to_cpu(notif->amsdu_enabled); mvmsta->max_amsdu_len = size; + sta->max_rc_amsdu_len = mvmsta->max_amsdu_len; + + for (i = 0; i < IWL_MAX_TID_COUNT; i++) { + if (mvmsta->amsdu_enabled & BIT(i)) + sta->max_tid_amsdu_len[i] = + iwl_mvm_max_amsdu_size(mvm, sta, i); + else + /* + * Not so elegant, but this will effectively + * prevent AMSDU on this TID + */ + sta->max_tid_amsdu_len[i] = 1; + } IWL_DEBUG_RATE(mvm, "AMSDU update. AMSDU size: %d, AMSDU selected size: %d, AMSDU TID bitmap 0x%X\n", diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 089972280daa..9b5682ce4b39 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1744,6 +1744,7 @@ static void rs_set_amsdu_len(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum rs_action scale_action) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + int i; /* * In case TLC offload is not active amsdu_enabled is either 0xFFFF @@ -1757,6 +1758,19 @@ static void rs_set_amsdu_len(struct iwl_mvm *mvm, struct ieee80211_sta *sta, mvmsta->amsdu_enabled = 0xFFFF; mvmsta->max_amsdu_len = sta->max_amsdu_len; + sta->max_rc_amsdu_len = mvmsta->max_amsdu_len; + + for (i = 0; i < IWL_MAX_TID_COUNT; i++) { + if (mvmsta->amsdu_enabled) + sta->max_tid_amsdu_len[i] = + iwl_mvm_max_amsdu_size(mvm, sta, i); + else + /* + * Not so elegant, but this will effectively + * prevent AMSDU on this TID + */ + sta->max_tid_amsdu_len[i] = 1; + } } /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index dfdfaa9ecfdb..2f2c355c13b0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -779,78 +779,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) return 0; } -#ifdef CONFIG_INET - -static int -iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, - netdev_features_t netdev_flags, - struct sk_buff_head *mpdus_skb) -{ - struct sk_buff *tmp, *next; - struct ieee80211_hdr *hdr = (void *)skb->data; - char cb[sizeof(skb->cb)]; - u16 i = 0; - unsigned int tcp_payload_len; - unsigned int mss = skb_shinfo(skb)->gso_size; - bool ipv4 = (skb->protocol == htons(ETH_P_IP)); - u16 ip_base_id = ipv4 ? ntohs(ip_hdr(skb)->id) : 0; - - skb_shinfo(skb)->gso_size = num_subframes * mss; - memcpy(cb, skb->cb, sizeof(cb)); - - next = skb_gso_segment(skb, netdev_flags); - skb_shinfo(skb)->gso_size = mss; - if (WARN_ON_ONCE(IS_ERR(next))) - return -EINVAL; - else if (next) - consume_skb(skb); - - while (next) { - tmp = next; - next = tmp->next; - - memcpy(tmp->cb, cb, sizeof(tmp->cb)); - /* - * Compute the length of all the data added for the A-MSDU. - * This will be used to compute the length to write in the TX - * command. We have: SNAP + IP + TCP for n -1 subframes and - * ETH header for n subframes. - */ - tcp_payload_len = skb_tail_pointer(tmp) - - skb_transport_header(tmp) - - tcp_hdrlen(tmp) + tmp->data_len; - - if (ipv4) - ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes); - - if (tcp_payload_len > mss) { - skb_shinfo(tmp)->gso_size = mss; - } else { - if (ieee80211_is_data_qos(hdr->frame_control)) { - u8 *qc; - - if (ipv4) - ip_send_check(ip_hdr(tmp)); - - qc = ieee80211_get_qos_ctl((void *)tmp->data); - *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; - } - skb_shinfo(tmp)->gso_size = 0; - } - - tmp->prev = NULL; - tmp->next = NULL; - - __skb_queue_tail(mpdus_skb, tmp); - i++; - } - - return 0; -} - -static unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, - struct ieee80211_sta *sta, - unsigned int tid) +unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, + struct ieee80211_sta *sta, unsigned int tid) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); enum nl80211_band band = mvmsta->vif->bss_conf.chandef.chan->band; @@ -878,128 +808,6 @@ static unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, mvm->fwrt.smem_cfg.lmac[lmac].txfifo_size[txf] - 256); } -static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, - struct ieee80211_tx_info *info, - struct ieee80211_sta *sta, - struct sk_buff_head *mpdus_skb) -{ - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - struct ieee80211_hdr *hdr = (void *)skb->data; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int num_subframes, tcp_payload_len, subf_len, max_amsdu_len; - u16 snap_ip_tcp, pad; - unsigned int dbg_max_amsdu_len; - netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG; - u8 tid; - - snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + - tcp_hdrlen(skb); - - dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len); - - if (!mvmsta->max_amsdu_len || - !ieee80211_is_data_qos(hdr->frame_control) || - (!mvmsta->amsdu_enabled && !dbg_max_amsdu_len)) - return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); - - /* - * Do not build AMSDU for IPv6 with extension headers. - * ask stack to segment and checkum the generated MPDUs for us. - */ - if (skb->protocol == htons(ETH_P_IPV6) && - ((struct ipv6hdr *)skb_network_header(skb))->nexthdr != - IPPROTO_TCP) { - netdev_flags &= ~NETIF_F_CSUM_MASK; - return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); - } - - tid = ieee80211_get_tid(hdr); - if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) - return -EINVAL; - - /* - * No need to lock amsdu_in_ampdu_allowed since it can't be modified - * during an BA session. - */ - if (info->flags & IEEE80211_TX_CTL_AMPDU && - !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed) - return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); - - if (iwl_mvm_vif_low_latency(iwl_mvm_vif_from_mac80211(mvmsta->vif)) || - !(mvmsta->amsdu_enabled & BIT(tid))) - return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); - - max_amsdu_len = iwl_mvm_max_amsdu_size(mvm, sta, tid); - - if (unlikely(dbg_max_amsdu_len)) - max_amsdu_len = min_t(unsigned int, max_amsdu_len, - dbg_max_amsdu_len); - - /* - * Limit A-MSDU in A-MPDU to 4095 bytes when VHT is not - * supported. This is a spec requirement (IEEE 802.11-2015 - * section 8.7.3 NOTE 3). - */ - if (info->flags & IEEE80211_TX_CTL_AMPDU && - !sta->vht_cap.vht_supported) - max_amsdu_len = min_t(unsigned int, max_amsdu_len, 4095); - - /* Sub frame header + SNAP + IP header + TCP header + MSS */ - subf_len = sizeof(struct ethhdr) + snap_ip_tcp + mss; - pad = (4 - subf_len) & 0x3; - - /* - * If we have N subframes in the A-MSDU, then the A-MSDU's size is - * N * subf_len + (N - 1) * pad. - */ - num_subframes = (max_amsdu_len + pad) / (subf_len + pad); - - if (sta->max_amsdu_subframes && - num_subframes > sta->max_amsdu_subframes) - num_subframes = sta->max_amsdu_subframes; - - tcp_payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - - tcp_hdrlen(skb) + skb->data_len; - - /* - * Make sure we have enough TBs for the A-MSDU: - * 2 for each subframe - * 1 more for each fragment - * 1 more for the potential data in the header - */ - if ((num_subframes * 2 + skb_shinfo(skb)->nr_frags + 1) > - mvm->trans->max_skb_frags) - num_subframes = 1; - - if (num_subframes > 1) - *ieee80211_get_qos_ctl(hdr) |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; - - /* This skb fits in one single A-MSDU */ - if (num_subframes * mss >= tcp_payload_len) { - __skb_queue_tail(mpdus_skb, skb); - return 0; - } - - /* - * Trick the segmentation function to make it - * create SKBs that can fit into one A-MSDU. - */ - return iwl_mvm_tx_tso_segment(skb, num_subframes, netdev_flags, - mpdus_skb); -} -#else /* CONFIG_INET */ -static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, - struct ieee80211_tx_info *info, - struct ieee80211_sta *sta, - struct sk_buff_head *mpdus_skb) -{ - /* Impossible to get TSO with CONFIG_INET */ - WARN_ON(1); - - return -1; -} -#endif - /* Check if there are any timed-out TIDs on a given shared TXQ */ static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) { @@ -1178,9 +986,6 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct ieee80211_tx_info info; - struct sk_buff_head mpdus_skbs; - unsigned int payload_len; - int ret; if (WARN_ON_ONCE(!mvmsta)) return -1; @@ -1190,35 +995,7 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, memcpy(&info, skb->cb, sizeof(info)); - if (!skb_is_gso(skb)) - return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); - - payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - - tcp_hdrlen(skb) + skb->data_len; - - if (payload_len <= skb_shinfo(skb)->gso_size) - return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); - - __skb_queue_head_init(&mpdus_skbs); - - ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs); - if (ret) - return ret; - - if (WARN_ON(skb_queue_empty(&mpdus_skbs))) - return ret; - - while (!skb_queue_empty(&mpdus_skbs)) { - skb = __skb_dequeue(&mpdus_skbs); - - ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta); - if (ret) { - __skb_queue_purge(&mpdus_skbs); - return ret; - } - } - - return 0; + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); } static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, From e7eed19a28f095d0b5d94992fd6b5d0c5bdbb637 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 17 Oct 2018 09:04:09 +0300 Subject: [PATCH 0190/1436] iwlwifi: mvm: fix values in the table example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We erroneously had some values for NGI in the table we give as an example in rs_fill_rates_for_column(), when they should be SGI. Change them so that they match what we say. Reported-by: Rémy Grünblatt Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 9b5682ce4b39..09866f50857a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -3346,12 +3346,12 @@ static void rs_fill_rates_for_column(struct iwl_mvm *mvm, /* Building the rate table is non trivial. When we're in MIMO2/VHT/80Mhz/SGI * column the rate table should look like this: * - * rate[0] 0x400D019 VHT | ANT: AB BW: 80Mhz MCS: 9 NSS: 2 SGI - * rate[1] 0x400D019 VHT | ANT: AB BW: 80Mhz MCS: 9 NSS: 2 SGI - * rate[2] 0x400D018 VHT | ANT: AB BW: 80Mhz MCS: 8 NSS: 2 SGI - * rate[3] 0x400D018 VHT | ANT: AB BW: 80Mhz MCS: 8 NSS: 2 SGI - * rate[4] 0x400D017 VHT | ANT: AB BW: 80Mhz MCS: 7 NSS: 2 SGI - * rate[5] 0x400D017 VHT | ANT: AB BW: 80Mhz MCS: 7 NSS: 2 SGI + * rate[0] 0x400F019 VHT | ANT: AB BW: 80Mhz MCS: 9 NSS: 2 SGI + * rate[1] 0x400F019 VHT | ANT: AB BW: 80Mhz MCS: 9 NSS: 2 SGI + * rate[2] 0x400F018 VHT | ANT: AB BW: 80Mhz MCS: 8 NSS: 2 SGI + * rate[3] 0x400F018 VHT | ANT: AB BW: 80Mhz MCS: 8 NSS: 2 SGI + * rate[4] 0x400F017 VHT | ANT: AB BW: 80Mhz MCS: 7 NSS: 2 SGI + * rate[5] 0x400F017 VHT | ANT: AB BW: 80Mhz MCS: 7 NSS: 2 SGI * rate[6] 0x4005007 VHT | ANT: A BW: 80Mhz MCS: 7 NSS: 1 NGI * rate[7] 0x4009006 VHT | ANT: B BW: 80Mhz MCS: 6 NSS: 1 NGI * rate[8] 0x4005005 VHT | ANT: A BW: 80Mhz MCS: 5 NSS: 1 NGI From af84282e2b48143894383d4d6d7812d72e292be9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 5 Oct 2018 09:02:49 +0000 Subject: [PATCH 0191/1436] iwlwifi: use kmemdup in iwl_parse_nvm_mcc_info() Use kmemdup rather than duplicating its implementation in iwl_parse_nvm_mcc_info(). Signed-off-by: YueHaibing Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 173ade96f119..484ef4556953 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -1195,14 +1195,12 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, regd_to_copy = sizeof(struct ieee80211_regdomain) + valid_rules * sizeof(struct ieee80211_reg_rule); - copy_rd = kzalloc(regd_to_copy, GFP_KERNEL); + copy_rd = kmemdup(regd, regd_to_copy, GFP_KERNEL); if (!copy_rd) { copy_rd = ERR_PTR(-ENOMEM); goto out; } - memcpy(copy_rd, regd, regd_to_copy); - out: kfree(regdb_ptrs); kfree(regd); From d6f9c7721064d99112ce56590a9a3fafe8ffc209 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 11 Oct 2018 10:57:57 +0100 Subject: [PATCH 0192/1436] iwlwifi: fix spelling mistake "registrating" -> "registering" Trivial fix to spelling mistake in IWL_ERR error message Signed-off-by: Colin Ian King Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/dvm/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index c219bca5cff4..bd3c3b921d4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1054,7 +1054,7 @@ static void iwl_bg_restart(struct work_struct *data) ieee80211_restart_hw(priv->hw); else IWL_ERR(priv, - "Cannot request restart before registrating with mac80211\n"); + "Cannot request restart before registering with mac80211\n"); } else { WARN_ON(1); } From 08f7d8b69aaf137db8ee0a2d7c9e6cd6383ae250 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 18 Oct 2018 11:03:06 +0300 Subject: [PATCH 0193/1436] iwlwifi: mvm: bring back mvm GSO code We have a slightly better TCP performance with GSO. Add it back, it can co-exist with the code that builds AMSDUs in mac80211. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 1 - drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 224 +++++++++++++++++- 2 files changed, 223 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3b24cced3f6f..ebb3814dd922 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -754,7 +754,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) } hw->netdev_features |= mvm->cfg->features; - hw->netdev_features &= ~(NETIF_F_TSO | NETIF_F_TSO6); if (!iwl_mvm_is_csum_supported(mvm)) { hw->netdev_features &= ~(IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 2f2c355c13b0..da8b69c9468c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -808,6 +808,197 @@ unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, mvm->fwrt.smem_cfg.lmac[lmac].txfifo_size[txf] - 256); } +#ifdef CONFIG_INET + +static int +iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, + netdev_features_t netdev_flags, + struct sk_buff_head *mpdus_skb) +{ + struct sk_buff *tmp, *next; + struct ieee80211_hdr *hdr = (void *)skb->data; + char cb[sizeof(skb->cb)]; + u16 i = 0; + unsigned int tcp_payload_len; + unsigned int mss = skb_shinfo(skb)->gso_size; + bool ipv4 = (skb->protocol == htons(ETH_P_IP)); + u16 ip_base_id = ipv4 ? ntohs(ip_hdr(skb)->id) : 0; + + skb_shinfo(skb)->gso_size = num_subframes * mss; + memcpy(cb, skb->cb, sizeof(cb)); + + next = skb_gso_segment(skb, netdev_flags); + skb_shinfo(skb)->gso_size = mss; + if (WARN_ON_ONCE(IS_ERR(next))) + return -EINVAL; + else if (next) + consume_skb(skb); + + while (next) { + tmp = next; + next = tmp->next; + + memcpy(tmp->cb, cb, sizeof(tmp->cb)); + /* + * Compute the length of all the data added for the A-MSDU. + * This will be used to compute the length to write in the TX + * command. We have: SNAP + IP + TCP for n -1 subframes and + * ETH header for n subframes. + */ + tcp_payload_len = skb_tail_pointer(tmp) - + skb_transport_header(tmp) - + tcp_hdrlen(tmp) + tmp->data_len; + + if (ipv4) + ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes); + + if (tcp_payload_len > mss) { + skb_shinfo(tmp)->gso_size = mss; + } else { + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *qc; + + if (ipv4) + ip_send_check(ip_hdr(tmp)); + + qc = ieee80211_get_qos_ctl((void *)tmp->data); + *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + } + skb_shinfo(tmp)->gso_size = 0; + } + + tmp->prev = NULL; + tmp->next = NULL; + + __skb_queue_tail(mpdus_skb, tmp); + i++; + } + + return 0; +} + +static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_tx_info *info, + struct ieee80211_sta *sta, + struct sk_buff_head *mpdus_skb) +{ + struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + struct ieee80211_hdr *hdr = (void *)skb->data; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int num_subframes, tcp_payload_len, subf_len, max_amsdu_len; + u16 snap_ip_tcp, pad; + unsigned int dbg_max_amsdu_len; + netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG; + u8 tid; + + snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + + tcp_hdrlen(skb); + + dbg_max_amsdu_len = READ_ONCE(mvm->max_amsdu_len); + + if (!mvmsta->max_amsdu_len || + !ieee80211_is_data_qos(hdr->frame_control) || + (!mvmsta->amsdu_enabled && !dbg_max_amsdu_len)) + return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); + + /* + * Do not build AMSDU for IPv6 with extension headers. + * ask stack to segment and checkum the generated MPDUs for us. + */ + if (skb->protocol == htons(ETH_P_IPV6) && + ((struct ipv6hdr *)skb_network_header(skb))->nexthdr != + IPPROTO_TCP) { + netdev_flags &= ~NETIF_F_CSUM_MASK; + return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); + } + + tid = ieee80211_get_tid(hdr); + if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) + return -EINVAL; + + /* + * No need to lock amsdu_in_ampdu_allowed since it can't be modified + * during an BA session. + */ + if (info->flags & IEEE80211_TX_CTL_AMPDU && + !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed) + return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); + + if (iwl_mvm_vif_low_latency(iwl_mvm_vif_from_mac80211(mvmsta->vif)) || + !(mvmsta->amsdu_enabled & BIT(tid))) + return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb); + + max_amsdu_len = iwl_mvm_max_amsdu_size(mvm, sta, tid); + + if (unlikely(dbg_max_amsdu_len)) + max_amsdu_len = min_t(unsigned int, max_amsdu_len, + dbg_max_amsdu_len); + + /* + * Limit A-MSDU in A-MPDU to 4095 bytes when VHT is not + * supported. This is a spec requirement (IEEE 802.11-2015 + * section 8.7.3 NOTE 3). + */ + if (info->flags & IEEE80211_TX_CTL_AMPDU && + !sta->vht_cap.vht_supported) + max_amsdu_len = min_t(unsigned int, max_amsdu_len, 4095); + + /* Sub frame header + SNAP + IP header + TCP header + MSS */ + subf_len = sizeof(struct ethhdr) + snap_ip_tcp + mss; + pad = (4 - subf_len) & 0x3; + + /* + * If we have N subframes in the A-MSDU, then the A-MSDU's size is + * N * subf_len + (N - 1) * pad. + */ + num_subframes = (max_amsdu_len + pad) / (subf_len + pad); + + if (sta->max_amsdu_subframes && + num_subframes > sta->max_amsdu_subframes) + num_subframes = sta->max_amsdu_subframes; + + tcp_payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - + tcp_hdrlen(skb) + skb->data_len; + + /* + * Make sure we have enough TBs for the A-MSDU: + * 2 for each subframe + * 1 more for each fragment + * 1 more for the potential data in the header + */ + if ((num_subframes * 2 + skb_shinfo(skb)->nr_frags + 1) > + mvm->trans->max_skb_frags) + num_subframes = 1; + + if (num_subframes > 1) + *ieee80211_get_qos_ctl(hdr) |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; + + /* This skb fits in one single A-MSDU */ + if (num_subframes * mss >= tcp_payload_len) { + __skb_queue_tail(mpdus_skb, skb); + return 0; + } + + /* + * Trick the segmentation function to make it + * create SKBs that can fit into one A-MSDU. + */ + return iwl_mvm_tx_tso_segment(skb, num_subframes, netdev_flags, + mpdus_skb); +} +#else /* CONFIG_INET */ +static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_tx_info *info, + struct ieee80211_sta *sta, + struct sk_buff_head *mpdus_skb) +{ + /* Impossible to get TSO with CONFIG_INET */ + WARN_ON(1); + + return -1; +} +#endif + /* Check if there are any timed-out TIDs on a given shared TXQ */ static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) { @@ -986,6 +1177,9 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct ieee80211_tx_info info; + struct sk_buff_head mpdus_skbs; + unsigned int payload_len; + int ret; if (WARN_ON_ONCE(!mvmsta)) return -1; @@ -995,7 +1189,35 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb, memcpy(&info, skb->cb, sizeof(info)); - return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + if (!skb_is_gso(skb)) + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + + payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - + tcp_hdrlen(skb) + skb->data_len; + + if (payload_len <= skb_shinfo(skb)->gso_size) + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + + __skb_queue_head_init(&mpdus_skbs); + + ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs); + if (ret) + return ret; + + if (WARN_ON(skb_queue_empty(&mpdus_skbs))) + return ret; + + while (!skb_queue_empty(&mpdus_skbs)) { + skb = __skb_dequeue(&mpdus_skbs); + + ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + if (ret) { + __skb_queue_purge(&mpdus_skbs); + return ret; + } + } + + return 0; } static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, From 4f1e85f0b8e270351c709de90d17a96b9f6e16a4 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 16 Oct 2018 15:29:45 +0300 Subject: [PATCH 0194/1436] iwlwifi: mvm: Flush transmit queues on P2P Device ROC done When a time event for a P2P Device interface is done, it is possible that there is still a frame pending for transmission that should be flushed. Set the IWL_MVM_STATUS_NEED_FLUSH_P2P to indicate to the ROC worker that P2P Device station queue need also to be flushed. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index e1a6f4e22253..9fa1a36dcda3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -334,6 +334,7 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm, switch (te_data->vif->type) { case NL80211_IFTYPE_P2P_DEVICE: ieee80211_remain_on_channel_expired(mvm->hw); + set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status); iwl_mvm_roc_finished(mvm); break; case NL80211_IFTYPE_STATION: From 055b22e770dd073d87bbb6416596456b2a4d708c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 17 Oct 2018 14:06:19 +0300 Subject: [PATCH 0195/1436] iwlwifi: mvm: Set Tx rate and flags when there is not station When a frame is transmitted without a station, need to set the rate and flags in the Tx command, as the FW does not have any information as to what rate and flags should be used for this frame. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index da8b69c9468c..2adef6e3e0ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -533,10 +533,11 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, /* * For data packets rate info comes from the fw. Only - * set rate/antenna during connection establishment. + * set rate/antenna during connection establishment or in case + * no station is given. */ - if (sta && (!ieee80211_is_data(hdr->frame_control) || - mvmsta->sta_state < IEEE80211_STA_AUTHORIZED)) { + if (!sta || !ieee80211_is_data(hdr->frame_control) || + mvmsta->sta_state < IEEE80211_STA_AUTHORIZED) { flags |= IWL_TX_FLAGS_CMD_RATE; rate_n_flags = iwl_mvm_get_tx_rate_n_flags(mvm, info, sta, From 8dd2cea8b65012003234b97c7f3dfaa61a3b4bd8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 17 Oct 2018 16:51:59 +0300 Subject: [PATCH 0196/1436] iwlwifi: mvm: Do not set RTS/CTS protection for P2P Device MAC As this is not needed and might cause interoperability issues during pairing with devices that would not reply to RTS frames. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index a79c79701cc4..11714eb8da5e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -793,8 +793,6 @@ static int iwl_mvm_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); - cmd.protection_flags |= cpu_to_le32(MAC_PROT_FLG_TGG_PROTECT); - /* Override the filter flags to accept only probe requests */ cmd.filter_flags = cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); From a74231ba4b57d54244c04cb59f61a1554bf015a7 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Mon, 8 Oct 2018 13:37:43 +0300 Subject: [PATCH 0197/1436] iwlwifi: update hcmds documentation A few commands refer to a struct that no longer exists in the mentioned name. Our trace-cmd parsing scripts rely on these mentioned names and can't find them, resulting in these commands not being parsed nicely. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/commands.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 8b4922bbe139..89c0cfddcc3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -415,7 +415,11 @@ enum iwl_legacy_cmds { TX_ANT_CONFIGURATION_CMD = 0x98, /** - * @STATISTICS_CMD: &struct iwl_statistics_cmd + * @STATISTICS_CMD: + * one of &struct iwl_statistics_cmd, + * &struct iwl_notif_statistics_v11, + * &struct iwl_notif_statistics_v10, + * &struct iwl_notif_statistics */ STATISTICS_CMD = 0x9c, @@ -423,7 +427,7 @@ enum iwl_legacy_cmds { * @STATISTICS_NOTIFICATION: * one of &struct iwl_notif_statistics_v10, * &struct iwl_notif_statistics_v11, - * &struct iwl_notif_statistics_cdb + * &struct iwl_notif_statistics */ STATISTICS_NOTIFICATION = 0x9d, From 45dc7ba4b423cbec14fccbb17c884614aa2006f8 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 21 Oct 2018 16:04:58 +0300 Subject: [PATCH 0198/1436] iwlwifi: mvm: make num_active_macs unsigned There is no point in having num_active_macs signed since it should never be negative. Set it to be an unsigned variable to ensure this. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c index d1d76bb9a750..9da0dae78510 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sf.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sf.c @@ -7,6 +7,7 @@ * * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -28,6 +29,7 @@ * * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,7 +66,7 @@ struct iwl_mvm_active_iface_iterator_data { struct ieee80211_vif *ignore_vif; u8 sta_vif_ap_sta_id; enum iwl_sf_state sta_vif_state; - int num_active_macs; + u32 num_active_macs; }; /* From 162b22c93e66530405d793a830f75da3fc48ed31 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 21 Oct 2018 14:39:05 +0300 Subject: [PATCH 0199/1436] iwlwifi: tighten boundary checks The driver assumes certain sizes and lengths aren't crossed in some places. Make sure this indeed happens. Found by Klocwork. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 24 +++++++++++++++---- .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 6 ++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 5f16879ab26a..56e99b5661f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -835,6 +835,8 @@ _iwl_fw_error_dump(struct iwl_fw_runtime *fwrt, if (!fwrt->trans->cfg->dccm_offset || !fwrt->trans->cfg->dccm_len) { const struct fw_img *img; + if (fwrt->cur_fw_img >= IWL_UCODE_TYPE_MAX) + return NULL; img = &fwrt->fw->img[fwrt->cur_fw_img]; sram_ofs = img->sec[IWL_UCODE_SECTION_DATA].offset; sram_len = img->sec[IWL_UCODE_SECTION_DATA].len; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 2adef6e3e0ac..ac62eb8c4b36 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1024,7 +1024,12 @@ static void iwl_mvm_tx_airtime(struct iwl_mvm *mvm, int airtime) { int mac = mvmsta->mac_id_n_color & FW_CTXT_ID_MSK; - struct iwl_mvm_tcm_mac *mdata = &mvm->tcm.data[mac]; + struct iwl_mvm_tcm_mac *mdata; + + if (mac >= NUM_MAC_INDEX_DRIVER) + return; + + mdata = &mvm->tcm.data[mac]; if (mvm->tcm.paused) return; @@ -1035,14 +1040,21 @@ static void iwl_mvm_tx_airtime(struct iwl_mvm *mvm, mdata->tx.airtime += airtime; } -static void iwl_mvm_tx_pkt_queued(struct iwl_mvm *mvm, - struct iwl_mvm_sta *mvmsta, int tid) +static int iwl_mvm_tx_pkt_queued(struct iwl_mvm *mvm, + struct iwl_mvm_sta *mvmsta, int tid) { u32 ac = tid_to_mac80211_ac[tid]; int mac = mvmsta->mac_id_n_color & FW_CTXT_ID_MSK; - struct iwl_mvm_tcm_mac *mdata = &mvm->tcm.data[mac]; + struct iwl_mvm_tcm_mac *mdata; + + if (mac >= NUM_MAC_INDEX_DRIVER) + return -EINVAL; + + mdata = &mvm->tcm.data[mac]; mdata->tx.pkts[ac]++; + + return 0; } /* @@ -1162,7 +1174,9 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, spin_unlock(&mvmsta->lock); - iwl_mvm_tx_pkt_queued(mvm, mvmsta, tid == IWL_MAX_TID_COUNT ? 0 : tid); + if (iwl_mvm_tx_pkt_queued(mvm, mvmsta, + tid == IWL_MAX_TID_COUNT ? 0 : tid)) + goto drop; return 0; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 156ca1b1f621..af2791502b7d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -214,7 +214,11 @@ static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int idx = iwl_pcie_gen2_get_num_tbs(trans, tfd); - struct iwl_tfh_tb *tb = &tfd->tbs[idx]; + struct iwl_tfh_tb *tb; + + if (WARN_ON(idx >= IWL_NUM_OF_TBS)) + return -EINVAL; + tb = &tfd->tbs[idx]; /* Each TFD can point to a maximum max_tbs Tx buffers */ if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->max_tbs) { From c1f33442976b52676aa8b5ee9140b0b85bb4bd75 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Mon, 22 Oct 2018 10:12:28 +0300 Subject: [PATCH 0200/1436] iwlwifi: memcpy from dev_cmd and not dev_cmd->hdr Klocwork complains about copying from dev_cmd->hdr if copying more than 4 bytes since it means part of the copy is from the next field. This isn't a real bug, but for not failing Klocwork next time - fix this. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index af2791502b7d..d2bd7f528a20 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -412,7 +412,7 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx_amsdu(struct iwl_trans *trans, goto out_err; /* building the A-MSDU might have changed this data, memcpy it now */ - memcpy(&txq->first_tb_bufs[idx], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); + memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); return tfd; out_err: @@ -473,7 +473,7 @@ iwl_tfh_tfd *iwl_pcie_gen2_build_tx(struct iwl_trans *trans, tb_phys = iwl_pcie_get_first_tb_dma(txq, idx); /* The first TB points to bi-directional DMA data */ - memcpy(&txq->first_tb_bufs[idx], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); + memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index ee990a7a5411..7073116c73b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -2438,8 +2438,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } /* building the A-MSDU might have changed this data, so memcpy it now */ - memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, - IWL_FIRST_TB_SIZE); + memcpy(&txq->first_tb_bufs[txq->write_ptr], dev_cmd, IWL_FIRST_TB_SIZE); tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ From b0d795a9ae558209656b18930c2b4def5f8fdfb8 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 21 Oct 2018 18:27:26 +0300 Subject: [PATCH 0201/1436] iwlwifi: mvm: avoid possible access out of array. The value in txq_id can be out of array scope, validate it before accessing the array. Signed-off-by: Mordechay Goodstein Fixes: cf961e16620f ("iwlwifi: mvm: support dqa-mode agg on non-shared queue") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 09f294c466da..5f42897dcd55 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2702,7 +2702,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_tid_data *tid_data; u16 normalized_ssn; - int txq_id; + u16 txq_id; int ret; if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) @@ -2744,17 +2744,24 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ txq_id = mvmsta->tid_data[tid].txq_id; if (txq_id == IWL_MVM_INVALID_QUEUE) { - txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, - IWL_MVM_DQA_MIN_DATA_QUEUE, - IWL_MVM_DQA_MAX_DATA_QUEUE); - if (txq_id < 0) { - ret = txq_id; + ret = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, + IWL_MVM_DQA_MIN_DATA_QUEUE, + IWL_MVM_DQA_MAX_DATA_QUEUE); + if (ret < 0) { IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto out; } + txq_id = ret; + /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; + } else if (WARN_ON(txq_id >= IWL_MAX_HW_QUEUES)) { + ret = -ENXIO; + IWL_ERR(mvm, "tid_id %d out of range (0, %d)!\n", + tid, IWL_MAX_HW_QUEUES - 1); + goto out; + } else if (unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { ret = -ENXIO; From acf42a957e341abcafd28d5e89a5e8bfffd67989 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Sun, 21 Oct 2018 18:48:13 +0300 Subject: [PATCH 0202/1436] iwlwifi: avoid access out of memory allocated The value in num_lmac can be bigger than mem_cfg->lmac array, warn in case it's bigger. Signed-off-by: Mordechay Goodstein Fixes: 68025d5f9bfe ("iwlwifi: dbg: refactor dump code to improve readability") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 56e99b5661f7..a97bf17da14d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -748,6 +748,9 @@ static int iwl_fw_rxf_len(struct iwl_fw_runtime *fwrt, ADD_LEN(fifo_len, mem_cfg->rxfifo2_size, hdr_len); /* Count RXF1 sizes */ + if (WARN_ON(mem_cfg->num_lmacs > MAX_NUM_LMAC)) + mem_cfg->num_lmacs = MAX_NUM_LMAC; + for (i = 0; i < mem_cfg->num_lmacs; i++) ADD_LEN(fifo_len, mem_cfg->lmac[i].rxfifo1_size, hdr_len); @@ -766,6 +769,9 @@ static int iwl_fw_txf_len(struct iwl_fw_runtime *fwrt, goto dump_internal_txf; /* Count TXF sizes */ + if (WARN_ON(mem_cfg->num_lmacs > MAX_NUM_LMAC)) + mem_cfg->num_lmacs = MAX_NUM_LMAC; + for (i = 0; i < mem_cfg->num_lmacs; i++) { int j; From f9119304e1c9c6a634afe9a88f8b228f3997109d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Oct 2018 20:51:56 +0200 Subject: [PATCH 0203/1436] iwlwifi: fw api: remove unused/deprecated filter status These are unused by both firmware and driver, remove them. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/rx.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 0791a854fc8f..b4f7ea30a7c1 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -209,8 +209,6 @@ enum iwl_rx_phy_flags { * @RX_MPDU_RES_STATUS_CSUM_OK: checksum found no errors * @RX_MPDU_RES_STATUS_STA_ID_MSK: station ID mask * @RX_MDPU_RES_STATUS_STA_ID_SHIFT: station ID bit shift - * @RX_MPDU_RES_STATUS_FILTERING_MSK: filter status - * @RX_MPDU_RES_STATUS2_FILTERING_MSK: filter status 2 */ enum iwl_mvm_rx_status { RX_MPDU_RES_STATUS_CRC_OK = BIT(0), @@ -238,8 +236,6 @@ enum iwl_mvm_rx_status { RX_MPDU_RES_STATUS_CSUM_OK = BIT(17), RX_MDPU_RES_STATUS_STA_ID_SHIFT = 24, RX_MPDU_RES_STATUS_STA_ID_MSK = 0x1f << RX_MDPU_RES_STATUS_STA_ID_SHIFT, - RX_MPDU_RES_STATUS_FILTERING_MSK = (0xc00000), - RX_MPDU_RES_STATUS2_FILTERING_MSK = (0xc0000000), }; /* 9000 series API */ From ec95b2701d244501068690ad2c43d184413ffc85 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Oct 2018 09:50:08 +0200 Subject: [PATCH 0204/1436] iwlwifi: fw api: document WoWLAN patterns command Document the WoWLAN patterns command structure. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/d3.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h index 6fae02fa4cad..86ea0784e1a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h @@ -224,8 +224,18 @@ struct iwl_wowlan_pattern { #define IWL_WOWLAN_MAX_PATTERNS 20 +/** + * struct iwl_wowlan_patterns_cmd - WoWLAN wakeup patterns + */ struct iwl_wowlan_patterns_cmd { + /** + * @n_patterns: number of patterns + */ __le32 n_patterns; + + /** + * @patterns: the patterns, array length in @n_patterns + */ struct iwl_wowlan_pattern patterns[]; } __packed; /* WOWLAN_PATTERN_ARRAY_API_S_VER_1 */ From 425784aa5b029eeb80498c73a68f62c3ad1d3b3f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 24 Jan 2019 14:33:12 +0200 Subject: [PATCH 0205/1436] IB/uverbs: Fix OOPs upon device disassociation The async_file might be freed before the disassociation has been ended, causing qp shutdown to use after free on it. Since uverbs_destroy_ufile_hw is not a fence, it returns if a disassociation is ongoing in another thread. It has to be written this way to avoid deadlock. However this means that the ufile FD close cannot destroy anything that may still be used by an active kref, such as the the async_file. To fix that move the kref_put() to be in ib_uverbs_release_file(). BUG: unable to handle kernel paging request at ffffffffba682787 PGD bc80e067 P4D bc80e067 PUD bc80f063 PMD 1313df163 PTE 80000000bc682061 Oops: 0003 [#1] SMP PTI CPU: 1 PID: 32410 Comm: bash Tainted: G OE 4.20.0-rc6+ #3 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__pv_queued_spin_lock_slowpath+0x1b3/0x2a0 Code: 98 83 e2 60 49 89 df 48 8b 04 c5 80 18 72 ba 48 8d ba 80 32 02 00 ba 00 80 00 00 4c 8d 65 14 41 bd 01 00 00 00 48 01 c7 85 d2 <48> 89 2f 48 89 fb 74 14 8b 45 08 85 c0 75 42 84 d2 74 6b f3 90 83 RSP: 0018:ffffc1bbc064fb58 EFLAGS: 00010006 RAX: ffffffffba65f4e7 RBX: ffff9f209c656c00 RCX: 0000000000000001 RDX: 0000000000008000 RSI: 0000000000000000 RDI: ffffffffba682787 RBP: ffff9f217bb23280 R08: 0000000000000001 R09: 0000000000000000 R10: ffff9f209d2c7800 R11: ffffffffffffffe8 R12: ffff9f217bb23294 R13: 0000000000000001 R14: 0000000000000000 R15: ffff9f209c656c00 FS: 00007fac55aad740(0000) GS:ffff9f217bb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffba682787 CR3: 000000012f8e0000 CR4: 00000000000006e0 Call Trace: _raw_spin_lock_irq+0x27/0x30 ib_uverbs_release_uevent+0x1e/0xa0 [ib_uverbs] uverbs_free_qp+0x7e/0x90 [ib_uverbs] destroy_hw_idr_uobject+0x1c/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2e/0x180 [ib_uverbs] __uverbs_cleanup_ufile+0x73/0x90 [ib_uverbs] uverbs_destroy_ufile_hw+0x5d/0x120 [ib_uverbs] ib_uverbs_remove_one+0xea/0x240 [ib_uverbs] ib_unregister_device+0xfb/0x200 [ib_core] mlx5_ib_remove+0x51/0xe0 [mlx5_ib] mlx5_remove_device+0xc1/0xd0 [mlx5_core] mlx5_unregister_device+0x3d/0xb0 [mlx5_core] remove_one+0x2a/0x90 [mlx5_core] pci_device_remove+0x3b/0xc0 device_release_driver_internal+0x16d/0x240 unbind_store+0xb2/0x100 kernfs_fop_write+0x102/0x180 __vfs_write+0x36/0x1a0 ? __alloc_fd+0xa9/0x170 ? set_close_on_exec+0x49/0x70 vfs_write+0xad/0x1a0 ksys_write+0x52/0xc0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fac551aac60 Cc: # 4.2 Fixes: 036b10635739 ("IB/uverbs: Enable device removal when there are active user space applications") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2890a77339e1..15add0688fbb 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -204,6 +204,9 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + if (file->async_file) + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); put_device(&file->device->dev); kfree(file); } @@ -1096,10 +1099,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) list_del_init(&file->list); mutex_unlock(&file->device->lists_mutex); - if (file->async_file) - kref_put(&file->async_file->ref, - ib_uverbs_release_async_event_file); - kref_put(&file->ref, ib_uverbs_release_file); return 0; From c1b03c25f5c1503e2db3ac1c0e12ef3e054fa065 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 24 Jan 2019 14:33:32 +0200 Subject: [PATCH 0206/1436] RDMA/mlx5: Fix flow creation on representors The intention of the flow_is_supported was to disable the entire tree and methods that allow raw flow creation, but the grammar syntax has this disable the entire UVERBS_FLOW object. Since the method requires a MLX5_IB_OBJECT_FLOW_MATCHER there is no need to do anything, as it is automatically disabled when matchers are disabled. This restores the ability to create flow steering rules on representors via regular verbs. Fixes: a1462351b590 ("RDMA/mlx5: Fail early if user tries to create flows on IB representors") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index e8a1e4498e3f..798591a18484 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, - &mlx5_ib_fs, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + &mlx5_ib_fs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_actions), {}, From f8ade8e2429758efa1eddf0a6e2cc936714afc8d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 24 Jan 2019 15:00:07 +0200 Subject: [PATCH 0207/1436] IB/uverbs: Fix ioctl query port to consider device disassociation Methods cannot peak into the ufile, the only way to get a ucontext and hence a device is via the ib_uverbs_get_ucontext() call or inspecing a locked uobject. Otherwise during/after disassociation the pointers may be null or free'd. BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 PGD 800000005ece6067 P4D 800000005ece6067 PUD 5ece7067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 10631 Comm: ibv_ud_pingpong Tainted: GW OE 4.20.0-rc6+ #3 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_PORT+0x53/0x191 [ib_uverbs] Code: 80 00 00 00 31 c0 48 8b 47 40 48 8d 5c 24 38 48 8d 6c 24 08 48 89 df 48 8b 40 08 4c 8b a0 18 03 00 00 31 c0 f3 48 ab 48 89 ef <49> 83 7c 24 78 00 b1 06 f3 48 ab 0f 84 89 00 00 00 45 31 c9 31 d2 RSP: 0018:ffffb54802ccfb10 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffb54802ccfb48 RCX:0000000000000000 RDX: fffffffffffffffa RSI: ffffb54802ccfcf8 RDI:ffffb54802ccfb18 RBP: ffffb54802ccfb18 R08: ffffb54802ccfd18 R09:0000000000000000 R10: 0000000000000000 R11: 00000000000000d0 R12:0000000000000000 R13: ffffb54802ccfcb0 R14: ffffb54802ccfc48 R15:ffff9f736e0059a0 FS: 00007f55a6bd7740(0000) GS:ffff9f737ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000078 CR3: 0000000064214000 CR4:00000000000006f0 Call Trace: ib_uverbs_cmd_verbs.isra.5+0x94d/0xa60 [ib_uverbs] ? copy_port_attr_to_resp+0x120/0x120 [ib_uverbs] ? arch_tlb_finish_mmu+0x16/0xc0 ? tlb_finish_mmu+0x1f/0x30 ? unmap_region+0xd9/0x120 ib_uverbs_ioctl+0xbc/0x120 [ib_uverbs] do_vfs_ioctl+0xa9/0x620 ? __do_munmap+0x29f/0x3a0 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f55a62cb567 Fixes: 641d1207d2ed ("IB/core: Move query port to ioctl") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 5030ec480370..2a3f2f01028d 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr, static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)( struct uverbs_attr_bundle *attrs) { - struct ib_device *ib_dev = attrs->ufile->device->ib_dev; + struct ib_device *ib_dev; struct ib_port_attr attr = {}; struct ib_uverbs_query_port_resp_ex resp = {}; + struct ib_ucontext *ucontext; int ret; u8 port_num; + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */ if (!ib_dev->ops.query_port) return -EOPNOTSUPP; From 70999ec1c9d3f783a7232973cfc26971e5732758 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 25 Jan 2019 15:12:45 -0300 Subject: [PATCH 0208/1436] MIPS: DTS: jz4740: Correct interrupt number of DMA core The interrupt number set in the devicetree node of the DMA driver was wrong. Signed-off-by: Paul Cercueil Signed-off-by: Paul Burton Cc: Rob Herring Cc: Mark Rutland Cc: Ralf Baechle Cc: James Hogan Cc: devicetree@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/boot/dts/ingenic/jz4740.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index 6fb16fd24035..2beb78a62b7d 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -161,7 +161,7 @@ #dma-cells = <2>; interrupt-parent = <&intc>; - interrupts = <29>; + interrupts = <20>; clocks = <&cgu JZ4740_CLK_DMA>; From 6ab4aba00f811a5265acc4d3eb1863bb3ca60562 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Thu, 24 Jan 2019 14:33:19 +0200 Subject: [PATCH 0209/1436] IB/ipoib: Fix for use-after-free in ipoib_cm_tx_start The following BUG was reported by kasan: BUG: KASAN: use-after-free in ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] Read of size 80 at addr ffff88034c30bcd0 by task kworker/u16:1/24020 Workqueue: ipoib_wq ipoib_cm_tx_start [ib_ipoib] Call Trace: dump_stack+0x9a/0xeb print_address_description+0xe3/0x2e0 kasan_report+0x18a/0x2e0 ? ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] memcpy+0x1f/0x50 ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] ? kvm_clock_read+0x1f/0x30 ? ipoib_cm_skb_reap+0x610/0x610 [ib_ipoib] ? __lock_is_held+0xc2/0x170 ? process_one_work+0x880/0x1960 ? process_one_work+0x912/0x1960 process_one_work+0x912/0x1960 ? wq_pool_ids_show+0x310/0x310 ? lock_acquire+0x145/0x440 worker_thread+0x87/0xbb0 ? process_one_work+0x1960/0x1960 kthread+0x314/0x3d0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x3a/0x50 Allocated by task 0: kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc_trace+0x168/0x3e0 path_rec_create+0xa2/0x1f0 [ib_ipoib] ipoib_start_xmit+0xa98/0x19e0 [ib_ipoib] dev_hard_start_xmit+0x159/0x8d0 sch_direct_xmit+0x226/0xb40 __dev_queue_xmit+0x1d63/0x2950 neigh_update+0x889/0x1770 arp_process+0xc47/0x21f0 arp_rcv+0x462/0x760 __netif_receive_skb_core+0x1546/0x2da0 netif_receive_skb_internal+0xf2/0x590 napi_gro_receive+0x28e/0x390 ipoib_ib_handle_rx_wc_rss+0x873/0x1b60 [ib_ipoib] ipoib_rx_poll_rss+0x17d/0x320 [ib_ipoib] net_rx_action+0x427/0xe30 __do_softirq+0x28e/0xc42 Freed by task 26680: __kasan_slab_free+0x11d/0x160 kfree+0xf5/0x360 ipoib_flush_paths+0x532/0x9d0 [ib_ipoib] ipoib_set_mode_rss+0x1ad/0x560 [ib_ipoib] set_mode+0xc8/0x150 [ib_ipoib] kernfs_fop_write+0x279/0x440 __vfs_write+0xd8/0x5c0 vfs_write+0x15e/0x470 ksys_write+0xb8/0x180 do_syscall_64+0x9b/0x420 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88034c30bcc8 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 8 bytes inside of 512-byte region [ffff88034c30bcc8, ffff88034c30bec8) The buggy address belongs to the page: The following race between change mode and xmit flow is the reason for this use-after-free: Change mode Send packet 1 to GID XX Send packet 2 to GID XX | | | start | | | | | | | | | Create new path for GID XX | | and update neigh path | | | | | | | | | | flush_paths | | | | queue_work(cm.start_task) | | Path for GID XX not found | create new path | | start_task runs with old released path There is no locking to protect the lifetime of the path through the ipoib_cm_tx struct, so delete it entirely and always use the newly looked up path under the priv->lock. Fixes: 546481c2816e ("IB/ipoib: Fix memory corruption in ipoib cm mode connect flow") Signed-off-by: Feras Daoud Reviewed-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 1da119d901a9..73e808c1e6ad 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -248,7 +248,6 @@ struct ipoib_cm_tx { struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; - struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; unsigned int tx_head; unsigned int tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0428e01e8f69..aa9dcfc36cd3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path neigh->cm = tx; tx->neigh = neigh; - tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); @@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); + memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); From 1ca1c87f91d9dc50d6a38e2177b2032996e7901c Mon Sep 17 00:00:00 2001 From: Zhou Yanjie Date: Fri, 25 Jan 2019 02:22:15 +0800 Subject: [PATCH 0210/1436] DTS: CI20: Fix bugs in ci20's device tree. According to the Schematic, the hardware of ci20 leads to uart3, but not to uart2. Uart2 is miswritten in the original code. Signed-off-by: Zhou Yanjie Signed-off-by: Paul Burton Cc: linux-mips Cc: linux-kernel Cc: devicetree@vger.kernel.org Cc: robh+dt@kernel.org Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: mark.rutland@arm.com Cc: malat@debian.org Cc: ezequiel@collabora.co.uk Cc: ulf.hansson@linaro.org Cc: syq Cc: jiaxun.yang --- arch/mips/boot/dts/ingenic/ci20.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 50cff3cbcc6d..4f7b1fa31cf5 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -76,7 +76,7 @@ status = "okay"; pinctrl-names = "default"; - pinctrl-0 = <&pins_uart2>; + pinctrl-0 = <&pins_uart3>; }; &uart4 { @@ -196,9 +196,9 @@ bias-disable; }; - pins_uart2: uart2 { - function = "uart2"; - groups = "uart2-data", "uart2-hwflow"; + pins_uart3: uart3 { + function = "uart3"; + groups = "uart3-data", "uart3-hwflow"; bias-disable; }; From 2b531b6137834a55857a337ac17510d6436b6fbb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Jan 2019 12:38:02 +0000 Subject: [PATCH 0211/1436] selftests: cpu-hotplug: fix case where CPUs offline > CPUs present The cpu-hotplug test assumes that we can offline the maximum CPU as described by /sys/devices/system/cpu/offline. However, in the case where the number of CPUs exceeds like kernel configuration then the offline count can be greater than the present count and we end up trying to test the offlining of a CPU that is not available to offline. Fix this by testing the maximum present CPU instead. Also, the test currently offlines the CPU and does not online it, so fix this by onlining the CPU after the test. Fixes: d89dffa976bc ("fault-injection: add selftests for cpu and memory hotplug") Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan --- .../selftests/cpu-hotplug/cpu-on-off-test.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index bab13dd025a6..0d26b5e3f966 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -37,6 +37,10 @@ prerequisite() exit $ksft_skip fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -151,6 +155,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else From 7e35a5940fab59eeccb9162eb2389342e0a87e53 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 17 Jan 2019 15:29:38 +0000 Subject: [PATCH 0212/1436] selftests: Use lirc.h from kernel tree, not from system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the system lirc.h is older than v4.16, you will get errors like: ir_loopback.c:32:16: error: field ‘proto’ has incomplete type enum rc_proto proto; Cc: Shuah Khan Signed-off-by: Sean Young Signed-off-by: Shuah Khan --- tools/testing/selftests/ir/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/ir/Makefile b/tools/testing/selftests/ir/Makefile index f4ba8eb84b95..ad06489c22a5 100644 --- a/tools/testing/selftests/ir/Makefile +++ b/tools/testing/selftests/ir/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ir_loopback.sh TEST_GEN_PROGS_EXTENDED := ir_loopback +APIDIR := ../../../include/uapi +CFLAGS += -Wall -O2 -I$(APIDIR) include ../lib.mk From ed5f13261cb65b02c611ae9971677f33581d4286 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Jan 2019 10:33:59 -0800 Subject: [PATCH 0213/1436] selftests/seccomp: Enhance per-arch ptrace syscall skip tests Passing EPERM during syscall skipping was confusing since the test wasn't actually exercising the errno evaluation -- it was just passing a literal "1" (EPERM). Instead, expand the tests to check both direct value returns (positive, 45000 in this case), and errno values (negative, -ESRCH in this case) to check both fake success and fake failure during syscall skipping. Reported-by: Colin Ian King Fixes: a33b2d0359a0 ("selftests/seccomp: Add tests for basic ptrace actions") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 72 +++++++++++++++---- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 496a9a8c773a..7e632b465ab4 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1608,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally) #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else -# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +# define EXPECT_SYSCALL_RETURN(val, action) \ + do { \ + errno = 0; \ + if (val < 0) { \ + EXPECT_EQ(-1, action); \ + EXPECT_EQ(-(val), errno); \ + } else { \ + EXPECT_EQ(val, action); \ + } \ + } while (0) #endif /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for @@ -1647,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) /* Architecture-specific syscall changing routine. */ void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall) + pid_t tracee, int syscall, int result) { int ret; ARCH_REGS regs; @@ -1706,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = EPERM; + regs.SYSCALL_RET = result; #endif #ifdef HAVE_GETREGS @@ -1734,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); break; case 0x1003: - /* skip gettid. */ + /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, 45000); break; case 0x1004: + /* skip openat with error. */ + EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1, -ESRCH); + break; + case 0x1005: /* do nothing (allow getppid) */ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; @@ -1774,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, nr = get_syscall(_metadata, tracee); if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); + if (nr == __NR_gettid) + change_syscall(_metadata, tracee, -1, 45000); if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, -ESRCH); } FIXTURE_DATA(TRACE_syscall) { @@ -1793,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -1842,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, ptrace_syscall_dropped) +TEST_F(TRACE_syscall, ptrace_syscall_errno) { /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, true); - /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); + /* Tracer should skip the open syscall, resulting in ESRCH. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_faked) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the gettid syscall, resulting fake pid. */ + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -1883,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, syscall_dropped) +TEST_F(TRACE_syscall, syscall_errno) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* openat has been skipped and an errno return. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, syscall_faked) { long ret; @@ -1894,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped) ASSERT_EQ(0, ret); /* gettid has been skipped and an altered return value stored. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); - EXPECT_NE(self->mytid, syscall(__NR_gettid)); + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, skip_after_RET_TRACE) From 32a7726c4f4aadfabdb82440d84f88a5a2c8fe13 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 21 Dec 2018 08:26:20 -0800 Subject: [PATCH 0214/1436] xtensa: SMP: fix secondary CPU initialization - add missing memory barriers to the secondary CPU synchronization spin loops; add comment to the matching memory barrier in the boot_secondary and __cpu_die functions; - use READ_ONCE/WRITE_ONCE to access cpu_start_id/cpu_start_ccount instead of reading/writing them directly; - re-initialize cpu_running every time before starting secondary CPU to flush possible previous CPU startup results. Signed-off-by: Max Filippov --- arch/xtensa/kernel/head.S | 5 ++++- arch/xtensa/kernel/smp.c | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index da08e75100ab..7f009719304e 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -276,12 +276,13 @@ should_never_return: movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -317,11 +318,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 932d64689bac..c9fc2c4f71b3 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -195,9 +195,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i; #ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1); @@ -206,18 +208,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount); - cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount); - while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout)); - if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -237,6 +242,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack); + init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -298,8 +304,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; } From 306b38305c0f86de7f17c5b091a95451dcc93d7d Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 24 Jan 2019 17:16:11 -0800 Subject: [PATCH 0215/1436] xtensa: smp_lx200_defconfig: fix vectors clash Secondary CPU reset vector overlaps part of the double exception handler code, resulting in weird crashes and hangups when running user code. Move exception vectors one page up so that they don't clash with the secondary CPU reset vector. Signed-off-by: Max Filippov --- arch/xtensa/configs/smp_lx200_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 11fed6c06a7c..b5938160fb3d 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -33,6 +33,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" From 8b1c42cdd7181200dc1fff39dcb6ac1a3fac2c25 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Jan 2019 00:26:48 -0800 Subject: [PATCH 0216/1436] xtensa: SMP: mark each possible CPU as present Otherwise it is impossible to enable CPUs after booting with 'maxcpus' parameter. Signed-off-by: Max Filippov --- arch/xtensa/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index c9fc2c4f71b3..80be6449c497 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i; - for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); } From bb6652363be0415578ec4a8794cbe9403db9c336 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 24 Jan 2019 16:51:28 -0800 Subject: [PATCH 0217/1436] drivers/irqchip: xtensa: add warning to irq_retrigger XEA2 and MX PIC can only retrigger software interrupts. Issue a warning if an interrupt of any other type is retriggered. Signed-off-by: Max Filippov --- drivers/irqchip/irq-xtensa-mx.c | 6 +++++- drivers/irqchip/irq-xtensa-pic.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 5385f5768345..0bd3fe3b969e 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -113,7 +113,11 @@ static void xtensa_mx_irq_ack(struct irq_data *d) static int xtensa_mx_irq_retrigger(struct irq_data *d) { - xtensa_set_sr(1 << d->hwirq, intset); + unsigned int mask = 1u << d->hwirq; + + if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE)) + return 0; + xtensa_set_sr(mask, intset); return 1; } diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c index c200234dd2c9..ab12328be5ee 100644 --- a/drivers/irqchip/irq-xtensa-pic.c +++ b/drivers/irqchip/irq-xtensa-pic.c @@ -70,7 +70,11 @@ static void xtensa_irq_ack(struct irq_data *d) static int xtensa_irq_retrigger(struct irq_data *d) { - xtensa_set_sr(1 << d->hwirq, intset); + unsigned int mask = 1u << d->hwirq; + + if (WARN_ON(mask & ~XCHAL_INTTYPE_MASK_SOFTWARE)) + return 0; + xtensa_set_sr(mask, intset); return 1; } From eb271710ec0241a99d1cfcd2a887172ac0858aa9 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 24 Jan 2019 14:06:49 -0800 Subject: [PATCH 0218/1436] drivers/irqchip: xtensa-mx: fix mask and unmask xtensa_irq_mask and xtensa_irq_unmask don't do the right thing when called for the first two external IRQs. Treat these IRQs as per-CPU IRQs. Signed-off-by: Max Filippov --- drivers/irqchip/irq-xtensa-mx.c | 34 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 0bd3fe3b969e..27933338f7b3 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -71,14 +71,17 @@ static void xtensa_mx_irq_mask(struct irq_data *d) unsigned int mask = 1u << d->hwirq; if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE | - XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { - set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) - - HW_IRQ_MX_BASE), MIENG); - } else { - mask = __this_cpu_read(cached_irq_mask) & ~mask; - __this_cpu_write(cached_irq_mask, mask); - xtensa_set_sr(mask, intenable); + XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { + unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq); + + if (ext_irq >= HW_IRQ_MX_BASE) { + set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENG); + return; + } } + mask = __this_cpu_read(cached_irq_mask) & ~mask; + __this_cpu_write(cached_irq_mask, mask); + xtensa_set_sr(mask, intenable); } static void xtensa_mx_irq_unmask(struct irq_data *d) @@ -86,14 +89,17 @@ static void xtensa_mx_irq_unmask(struct irq_data *d) unsigned int mask = 1u << d->hwirq; if (mask & (XCHAL_INTTYPE_MASK_EXTERN_EDGE | - XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { - set_er(1u << (xtensa_get_ext_irq_no(d->hwirq) - - HW_IRQ_MX_BASE), MIENGSET); - } else { - mask |= __this_cpu_read(cached_irq_mask); - __this_cpu_write(cached_irq_mask, mask); - xtensa_set_sr(mask, intenable); + XCHAL_INTTYPE_MASK_EXTERN_LEVEL)) { + unsigned int ext_irq = xtensa_get_ext_irq_no(d->hwirq); + + if (ext_irq >= HW_IRQ_MX_BASE) { + set_er(1u << (ext_irq - HW_IRQ_MX_BASE), MIENGSET); + return; + } } + mask |= __this_cpu_read(cached_irq_mask); + __this_cpu_write(cached_irq_mask, mask); + xtensa_set_sr(mask, intenable); } static void xtensa_mx_irq_enable(struct irq_data *d) From ad33cc805223614166c8f4081288f0b3c4b0862d Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 18 Jan 2019 13:45:27 +0000 Subject: [PATCH 0219/1436] xtensa: Fix typo use space=>user space This patch fix a simple typo. Signed-off-by: Corentin Labbe Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 20a0756f27ef..48906ac5fd25 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -164,7 +164,7 @@ config XTENSA_FAKE_NMI If unsure, say N. config XTENSA_UNALIGNED_USER - bool "Unaligned memory access in use space" + bool "Unaligned memory access in user space" help The Xtensa architecture currently does not handle unaligned memory accesses in hardware but through an exception handler. From 687cffd3450911b1b7449fd6c30f598b35666b41 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 23 Jan 2019 09:49:18 +0000 Subject: [PATCH 0220/1436] xtensa: rename BUILTIN_DTB to BUILTIN_DTB_SOURCE When building some xtensa config, I hit the following warning: drivers/staging/mt7621-dts/Kconfig:4:warning: 'BUILTIN_DTB' has wrong type. 'select' only accept arguments of bool and tristate type It is due to some arch use BUILTIN_DTB as a flag for the need to builtin dtb but xtensa use it as a string for which dtb to bulltin. But for this (which dtb to build), it is better to use BUILTIN_DTB_SOURCE like other arch do. Signed-off-by: Corentin Labbe Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 2 +- arch/xtensa/boot/dts/Makefile | 6 +++--- arch/xtensa/configs/audio_kc705_defconfig | 2 +- arch/xtensa/configs/cadence_csp_defconfig | 2 +- arch/xtensa/configs/generic_kc705_defconfig | 2 +- arch/xtensa/configs/nommu_kc705_defconfig | 2 +- arch/xtensa/configs/smp_lx200_defconfig | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 48906ac5fd25..ce91682770cb 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -451,7 +451,7 @@ config USE_OF help Include support for flattened device tree machine descriptions. -config BUILTIN_DTB +config BUILTIN_DTB_SOURCE string "DTB to build into the kernel image" depends on OF diff --git a/arch/xtensa/boot/dts/Makefile b/arch/xtensa/boot/dts/Makefile index f8052ba5aea8..0b8d00cdae7c 100644 --- a/arch/xtensa/boot/dts/Makefile +++ b/arch/xtensa/boot/dts/Makefile @@ -7,9 +7,9 @@ # # -BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o -ifneq ($(CONFIG_BUILTIN_DTB),"") -obj-$(CONFIG_OF) += $(BUILTIN_DTB) +BUILTIN_DTB_SOURCE := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") +obj-$(CONFIG_OF) += $(BUILTIN_DTB_SOURCE) endif # for CONFIG_OF_ALL_DTBS test diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index 2bf964df37ba..f378e56f9ce6 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -34,7 +34,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705" +CONFIG_BUILTIN_DTB_SOURCE="kc705" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_PM=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 3221b7053fa3..62f32a902568 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -38,7 +38,7 @@ CONFIG_HIGHMEM=y # CONFIG_PCI is not set CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="csp" +CONFIG_BUILTIN_DTB_SOURCE="csp" # CONFIG_COMPACTION is not set CONFIG_XTFPGA_LCD=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 985fa8546e4e..8bebe07f1060 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -33,7 +33,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=0x38000000@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705" +CONFIG_BUILTIN_DTB_SOURCE="kc705" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index f3fc4f970ca8..933ab2adf434 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -39,7 +39,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="kc705_nommu" +CONFIG_BUILTIN_DTB_SOURCE="kc705_nommu" CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index b5938160fb3d..e29c5b179a5b 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -38,7 +38,7 @@ CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" CONFIG_USE_OF=y -CONFIG_BUILTIN_DTB="lx200mx" +CONFIG_BUILTIN_DTB_SOURCE="lx200mx" # CONFIG_COMPACTION is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y From f36c1f9a8dfd6a78e6c3fe7aff5e722b84307597 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 25 Jan 2019 07:29:20 +0000 Subject: [PATCH 0221/1436] i3c: master: dw: fix deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In dw_i3c_master_irq_handler(), we already have gotten &master->xferqueue.lock, if we try to get the same lock again in dw_i3c_master_dequeue_xfer(), deadlock happens. We fix this issue by introduing dw_i3c_master_dequeue_xfer_locked() which does all what dw_i3c_master_dequeue_xfer() does without trying to lock &master->xferqueue.lock. Signed-off-by: Jisheng Zhang Acked-by: Vitor Soares  Signed-off-by: Boris Brezillon --- drivers/i3c/master/dw-i3c-master.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index f8c00b94817f..bb03079fbade 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -419,12 +419,9 @@ static void dw_i3c_master_enqueue_xfer(struct dw_i3c_master *master, spin_unlock_irqrestore(&master->xferqueue.lock, flags); } -static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, - struct dw_i3c_xfer *xfer) +static void dw_i3c_master_dequeue_xfer_locked(struct dw_i3c_master *master, + struct dw_i3c_xfer *xfer) { - unsigned long flags; - - spin_lock_irqsave(&master->xferqueue.lock, flags); if (master->xferqueue.cur == xfer) { u32 status; @@ -439,6 +436,15 @@ static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, } else { list_del_init(&xfer->node); } +} + +static void dw_i3c_master_dequeue_xfer(struct dw_i3c_master *master, + struct dw_i3c_xfer *xfer) +{ + unsigned long flags; + + spin_lock_irqsave(&master->xferqueue.lock, flags); + dw_i3c_master_dequeue_xfer_locked(master, xfer); spin_unlock_irqrestore(&master->xferqueue.lock, flags); } @@ -494,7 +500,7 @@ static void dw_i3c_master_end_xfer_locked(struct dw_i3c_master *master, u32 isr) complete(&xfer->comp); if (ret < 0) { - dw_i3c_master_dequeue_xfer(master, xfer); + dw_i3c_master_dequeue_xfer_locked(master, xfer); writel(readl(master->regs + DEVICE_CTRL) | DEV_CTRL_RESUME, master->regs + DEVICE_CTRL); } From 6a730fcb9cb472ba2d42b26a50ac65dacdd68882 Mon Sep 17 00:00:00 2001 From: "Darren Hart (VMware)" Date: Sat, 12 Jan 2019 20:27:35 -0800 Subject: [PATCH 0222/1436] Documentation/ABI: Correct mlxreg-io KernelVersion for 5.0 The mlxreg-io for the merge window assumed 4.21 as the next kernel version. Replace 4.21 with 5.0. Signed-off-by: Darren Hart (VMware) Reviewed-by: Andy Shevchenko --- Documentation/ABI/stable/sysfs-driver-mlxreg-io | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io index 9b642669cb16..169fe08a649b 100644 --- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io +++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io @@ -24,7 +24,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ cpld3_version Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak Description: These files show with which CPLD versions have been burned on LED board. @@ -35,7 +35,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ jtag_enable Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak Description: These files enable and disable the access to the JTAG domain. By default access to the JTAG domain is disabled. @@ -105,7 +105,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ reset_voltmon_upgrade_fail Date: November 2018 -KernelVersion: 4.21 +KernelVersion: 5.0 Contact: Vadim Pasternak Description: These files show the system reset cause, as following: ComEx power fail, reset from ComEx, system platform reset, reset From 8e47a457321ca1a74ad194ab5dcbca764bc70731 Mon Sep 17 00:00:00 2001 From: Piotr Jaroszynski Date: Sun, 27 Jan 2019 08:46:45 -0800 Subject: [PATCH 0223/1436] iomap: get/put the page in iomap_page_create/release() migrate_page_move_mapping() expects pages with private data set to have a page_count elevated by 1. This is what used to happen for xfs through the buffer_heads code before the switch to iomap in commit 82cb14175e7d ("xfs: add support for sub-pagesize writeback without buffer_heads"). Not having the count elevated causes move_pages() to fail on memory mapped files coming from xfs. Make iomap compatible with the migrate_page_move_mapping() assumption by elevating the page count as part of iomap_page_create() and lowering it in iomap_page_release(). It causes the move_pages() syscall to misbehave on memory mapped files from xfs. It does not not move any pages, which I suppose is "just" a perf issue, but it also ends up returning a positive number which is out of spec for the syscall. Talking to Michal Hocko, it sounds like returning positive numbers might be a necessary update to move_pages() anyway though. Fixes: 82cb14175e7d ("xfs: add support for sub-pagesize writeback without buffer_heads") Signed-off-by: Piotr Jaroszynski [hch: actually get/put the page iomap_migrate_page() to make it work properly] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/iomap.c b/fs/iomap.c index a3088fae567b..cb184ff68680 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page) atomic_set(&iop->read_count, 0); atomic_set(&iop->write_count, 0); bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); + + /* + * migrate_page_move_mapping() assumes that pages with private data have + * their count elevated by 1. + */ + get_page(page); set_page_private(page, (unsigned long)iop); SetPagePrivate(page); return iop; @@ -132,6 +138,7 @@ iomap_page_release(struct page *page) WARN_ON_ONCE(atomic_read(&iop->write_count)); ClearPagePrivate(page); set_page_private(page, 0); + put_page(page); kfree(iop); } @@ -569,8 +576,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, if (page_has_private(page)) { ClearPagePrivate(page); + get_page(newpage); set_page_private(newpage, page_private(page)); set_page_private(page, 0); + put_page(page); SetPagePrivate(newpage); } From 4ea899ead2786a30aaa8181fefa81a3df4ad28f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jan 2019 08:58:58 -0800 Subject: [PATCH 0224/1436] iomap: fix a use after free in iomap_dio_rw Introduce a local wait_for_completion variable to avoid an access to the potentially freed dio struture after dropping the last reference count. Also use the chance to document the completion behavior to make the refcounting clear to the reader of the code. Fixes: ff6a9292e6 ("iomap: implement direct I/O") Reported-by: Chandan Rajendra Reported-by: Darrick J. Wong Signed-off-by: Christoph Hellwig Tested-by: Chandan Rajendra Tested-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index cb184ff68680..897c60215dd1 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1813,6 +1813,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos, start = pos; loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; + bool wait_for_completion = is_sync_kiocb(iocb); struct blk_plug plug; struct iomap_dio *dio; @@ -1832,7 +1833,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->end_io = end_io; dio->error = 0; dio->flags = 0; - dio->wait_for_completion = is_sync_kiocb(iocb); dio->submit.iter = iter; dio->submit.waiter = current; @@ -1887,7 +1887,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; - if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && + if (iov_iter_rw(iter) == WRITE && !wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1903,7 +1903,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret <= 0) { /* magic error code to fall back to buffered I/O */ if (ret == -ENOTBLK) { - dio->wait_for_completion = true; + wait_for_completion = true; ret = 0; } break; @@ -1925,8 +1925,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; + /* + * We are about to drop our additional submission reference, which + * might be the last reference to the dio. There are three three + * different ways we can progress here: + * + * (a) If this is the last reference we will always complete and free + * the dio ourselves. + * (b) If this is not the last reference, and we serve an asynchronous + * iocb, we must never touch the dio after the decrement, the + * I/O completion handler will complete and free it. + * (c) If this is not the last reference, but we serve a synchronous + * iocb, the I/O completion handler will wake us up on the drop + * of the final reference, and we will complete and free it here + * after we got woken by the I/O completion handler. + */ + dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { - if (!dio->wait_for_completion) + if (!wait_for_completion) return -EIOCBQUEUED; for (;;) { @@ -1943,9 +1959,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } - ret = iomap_dio_complete(dio); - - return ret; + return iomap_dio_complete(dio); out_free_dio: kfree(dio); From 25384ce5f9530def39421597b1457d9462df6455 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 26 Jan 2019 20:35:18 -0800 Subject: [PATCH 0225/1436] xtensa: SMP: limit number of possible CPUs by NR_CPUS This fixes the following warning at boot when the kernel is booted on a board with more CPU cores than was configured in NR_CPUS: smp_init_cpus: Core Count = 8 smp_init_cpus: Core Id = 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/cpumask.h:121 smp_init_cpus+0x54/0x74 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc3-00015-g1459333f88a0 #124 Call Trace: __warn$part$3+0x6a/0x7c warn_slowpath_null+0x35/0x3c smp_init_cpus+0x54/0x74 setup_arch+0x1c0/0x1d0 start_kernel+0x44/0x310 _startup+0x107/0x107 Signed-off-by: Max Filippov --- arch/xtensa/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 80be6449c497..be1f280c322c 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -96,6 +96,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); } From 26cd8657c7e745686a4c54a5cccf721ede208a25 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Sat, 26 Jan 2019 11:37:28 +0100 Subject: [PATCH 0226/1436] arm64: dts: rockchip: fix graph_port warning on rk3399 bob kevin and excavator Ports are described by child 'port' nodes contained in the device node. 'ports' is optional and is used to group all 'port' nodes which is not the case here. This patch fixes the following warnings: arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts:25.9-29.5: Warning (graph_port): /edp-panel/ports: graph port node name should be 'port' arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts:46.9-50.5: Warningi (graph_port): /edp-panel/ports: graph port node name should be 'port' arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts:94.9-98.5: Warning (graph_port): /edp-panel/ports: graph port node name should be 'port' Signed-off-by: Enric Balletbo i Serra Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts index 1ee0dc0d9f10..d1cf404b8708 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts @@ -22,7 +22,7 @@ backlight = <&backlight>; power-supply = <&pp3300_disp>; - ports { + port { panel_in_edp: endpoint { remote-endpoint = <&edp_out_panel>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts index 81e73103fa78..15e254a77391 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts @@ -43,7 +43,7 @@ backlight = <&backlight>; power-supply = <&pp3300_disp>; - ports { + port { panel_in_edp: endpoint { remote-endpoint = <&edp_out_panel>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 0b8f1edbd746..b48a63c3efc3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -91,7 +91,7 @@ pinctrl-0 = <&lcd_panel_reset>; power-supply = <&vcc3v3_s0>; - ports { + port { panel_in_edp: endpoint { remote-endpoint = <&edp_out_panel>; }; From 26e2d7b03ea7ff254bf78305aa44dda62e70b78e Mon Sep 17 00:00:00 2001 From: Dmitry Voytik Date: Tue, 22 Jan 2019 23:38:48 +0100 Subject: [PATCH 0227/1436] arm64: dts: rockchip: enable usb-host regulators at boot on rk3328-rock64 After commit ef05bcb60c1a, boot from USB drives is broken. Fix this problem by enabling usb-host regulators during boot time. Fixes: ef05bcb60c1a ("arm64: dts: rockchip: fix vcc_host1_5v pin assign on rk3328-rock64") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Voytik Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index bd937d68ca3b..040b36ef0dd2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -40,6 +40,7 @@ pinctrl-0 = <&usb30_host_drv>; regulator-name = "vcc_host_5v"; regulator-always-on; + regulator-boot-on; vin-supply = <&vcc_sys>; }; @@ -51,6 +52,7 @@ pinctrl-0 = <&usb20_host_drv>; regulator-name = "vcc_host1_5v"; regulator-always-on; + regulator-boot-on; vin-supply = <&vcc_sys>; }; From 2a2ec4aa0577ec0b7df2d1bde5c84ed39a8637cb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 27 Jan 2019 16:08:00 -0800 Subject: [PATCH 0228/1436] hwmon: (nct6775) Fix fan6 detection for NCT6793D Commit 2d99925a15b6 ("hwmon: (nct6775) Separate fan/pwm configuration detection for NCT6793D") accidentally removed part of the code detecting if fan6 is enabled or not. As result, fan6 is no longer detected on Asus PRIME Z370-A. Restore the missing detection code. Fixes: 2d99925a15b6 ("hwmon: (nct6775) Separate fan/pwm configuration detection for NCT6793D") Reported-by: Chris Siebenmann Cc: Chris Siebenmann Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 4adec4ab7d06..59ee01f3d022 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -3594,7 +3594,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan5pin |= cr1b & BIT(5); fan5pin |= creb & BIT(5); - fan6pin = creb & BIT(3); + fan6pin = !dsw_en && (cr2d & BIT(1)); + fan6pin |= creb & BIT(3); pwm5pin |= cr2d & BIT(7); pwm5pin |= (creb & BIT(4)) && !(cr2a & BIT(0)); From 8c9620b1cc9b69e82fa8d4081d646d0016b602e7 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 19 Jan 2019 16:31:00 +0100 Subject: [PATCH 0229/1436] mmc: bcm2835: Fix DMA channel leak on probe error The BCM2835 MMC host driver requests a DMA channel on probe but neglects to release the channel in the probe error path. The channel may therefore be leaked, in particular if devm_clk_get() causes probe deferral. Fix it. Fixes: 660fc733bd74 ("mmc: bcm2835: Add new driver for the sdhost controller.") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v4.12+ Cc: Frank Pavlic Tested-by: Stefan Wahren Signed-off-by: Ulf Hansson --- drivers/mmc/host/bcm2835.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 50293529d6de..c9e7aa50bb0a 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1431,6 +1431,8 @@ static int bcm2835_probe(struct platform_device *pdev) err: dev_dbg(dev, "%s -> err %d\n", __func__, ret); + if (host->dma_chan_rxtx) + dma_release_channel(host->dma_chan_rxtx); mmc_free_host(mmc); return ret; From 3751e008da0df4384031bd66a516c0292f915605 Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Wed, 23 Jan 2019 20:05:25 +0800 Subject: [PATCH 0230/1436] mmc: mediatek: fix incorrect register setting of hs400_cmd_int_delay to set cmd internal delay, need set PAD_TUNE register but not PAD_CMD_TUNE register. Signed-off-by: Chaotian Jing Fixes: 1ede5cb88a29 ("mmc: mediatek: Use data tune for CMD line tune") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 8afeaf81ae66..833ef0590af8 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,7 +846,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz) if (timing == MMC_TIMING_MMC_HS400 && host->dev_comp->hs400_tune) - sdr_set_field(host->base + PAD_CMD_TUNE, + sdr_set_field(host->base + tune_reg, MSDC_PAD_TUNE_CMDRRDLY, host->hs400_cmd_int_delay); dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->mmc->actual_clock, From 1e19cdc8060227b0802bda6bc0bd22b23679ba32 Mon Sep 17 00:00:00 2001 From: Tejas Joglekar Date: Tue, 22 Jan 2019 13:26:51 +0530 Subject: [PATCH 0231/1436] usb: dwc3: gadget: Handle 0 xfer length for OUT EP For OUT endpoints, zero-length transfers require MaxPacketSize buffer as per the DWC_usb3 programming guide 3.30a section 4.2.3.3. This patch fixes this by explicitly checking zero length transfer to correctly pad up to MaxPacketSize. Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: stable@vger.kernel.org Signed-off-by: Tejas Joglekar Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index bed2ff42780b..6c9b76bcc2e1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1119,7 +1119,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); unsigned int rem = length % maxp; - if (rem && usb_endpoint_dir_out(dep->endpoint.desc)) { + if ((!length || rem) && usb_endpoint_dir_out(dep->endpoint.desc)) { struct dwc3 *dwc = dep->dwc; struct dwc3_trb *trb; From 07c69f1148da7de3978686d3af9263325d9d60bd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 15:28:08 -0600 Subject: [PATCH 0232/1436] usb: gadget: udc: net2272: Fix bitwise and boolean operations (!x & y) strikes again. Fix bitwise and boolean operations by enclosing the expression: intcsr & (1 << NET2272_PCI_IRQ) in parentheses, before applying the boolean operator '!'. Notice that this code has been there since 2011. So, it would be helpful if someone can double-check this. This issue was detected with the help of Coccinelle. Fixes: ceb80363b2ec ("USB: net2272: driver for PLX NET2272 USB device controller") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2272.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 660878a19505..b77f3126580e 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -2083,7 +2083,7 @@ static irqreturn_t net2272_irq(int irq, void *_dev) #if defined(PLX_PCI_RDK2) /* see if PCI int for us by checking irqstat */ intcsr = readl(dev->rdk2.fpga_base_addr + RDK2_IRQSTAT); - if (!intcsr & (1 << NET2272_PCI_IRQ)) { + if (!(intcsr & (1 << NET2272_PCI_IRQ))) { spin_unlock(&dev->lock); return IRQ_NONE; } From f2105d42597f4d10e431b195d69e96dccaf9b012 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 22 Jan 2019 11:36:02 +0100 Subject: [PATCH 0233/1436] usb: phy: fix link errors Fix link errors when CONFIG_FSL_USB2_OTG is enabled and USB_OTG_FSM is set to module then the following link error occurs. aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_ioctl': drivers/usb/phy/phy-fsl-usb.c:1083: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:1083:(.text+0x574): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_srp': drivers/usb/phy/phy-fsl-usb.c:674: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:674:(.text+0x61c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_host': drivers/usb/phy/phy-fsl-usb.c:593: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:593:(.text+0x7a4): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_hnp': drivers/usb/phy/phy-fsl-usb.c:695: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:695:(.text+0x858): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `a_wait_enum': drivers/usb/phy/phy-fsl-usb.c:274: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:274:(.text+0x16f0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o:drivers/usb/phy/phy-fsl-usb.c:619: more undefined references to `otg_statemachine' follow aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_peripheral': drivers/usb/phy/phy-fsl-usb.c:619:(.text+0x1fa0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' make[1]: *** [Makefile:1020: vmlinux] Error 1 make[1]: Target 'Image' not remade because of errors. make: *** [Makefile:152: sub-make] Error 2 make: Target 'Image' not remade because of errors. Rework so that FSL_USB2_OTG depends on that the USB_OTG_FSM is builtin. Signed-off-by: Anders Roxell Signed-off-by: Felipe Balbi --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index d7312eed6088..91ea3083e7ad 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,7 @@ config AB8500_USB config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help From 512e6fb589bc18f9321457632e89b95017447db9 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Tue, 22 Jan 2019 00:23:50 +0300 Subject: [PATCH 0234/1436] usb: dwc3: exynos: Fix error handling of clk_prepare_enable If clk_prepare_enable() fails in dwc3_exynos_probe() or in dwc3_exynos_resume(), exynos->clks[0] is left undisabled because of usage preincrement in while condition. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 9f2168367a0a ("usb: dwc3: exynos: Rework clock handling and prepare for new variants") Acked-by: Marek Szyprowski Signed-off-by: Alexey Khoroshilov Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-exynos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index cb7fcd7c0ad8..c1e9ea621f41 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -78,7 +78,7 @@ static int dwc3_exynos_probe(struct platform_device *pdev) for (i = 0; i < exynos->num_clks; i++) { ret = clk_prepare_enable(exynos->clks[i]); if (ret) { - while (--i > 0) + while (i-- > 0) clk_disable_unprepare(exynos->clks[i]); return ret; } @@ -223,7 +223,7 @@ static int dwc3_exynos_resume(struct device *dev) for (i = 0; i < exynos->num_clks; i++) { ret = clk_prepare_enable(exynos->clks[i]); if (ret) { - while (--i > 0) + while (i-- > 0) clk_disable_unprepare(exynos->clks[i]); return ret; } From a53469a68eb886e84dd8b69a1458a623d3591793 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 16 Jan 2019 11:54:07 -0600 Subject: [PATCH 0235/1436] usb: phy: am335x: fix race condition in _probe power off the phy should be done before populate the phy. Otherwise, am335x_init() could be called by the phy owner to power on the phy first, then am335x_phy_probe() turns off the phy again without the caller knowing it. Fixes: 2fc711d76352 ("usb: phy: am335x: Enable USB remote wakeup using PHY wakeup") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-am335x.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c index 27bdb7222527..f5f0568d8533 100644 --- a/drivers/usb/phy/phy-am335x.c +++ b/drivers/usb/phy/phy-am335x.c @@ -61,9 +61,6 @@ static int am335x_phy_probe(struct platform_device *pdev) if (ret) return ret; - ret = usb_add_phy_dev(&am_phy->usb_phy_gen.phy); - if (ret) - return ret; am_phy->usb_phy_gen.phy.init = am335x_init; am_phy->usb_phy_gen.phy.shutdown = am335x_shutdown; @@ -82,7 +79,7 @@ static int am335x_phy_probe(struct platform_device *pdev) device_set_wakeup_enable(dev, false); phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, am_phy->dr_mode, false); - return 0; + return usb_add_phy_dev(&am_phy->usb_phy_gen.phy); } static int am335x_phy_remove(struct platform_device *pdev) From a6279470762c19ba97e454f90798373dccdf6148 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 25 Jan 2019 11:48:51 +0000 Subject: [PATCH 0236/1436] Btrfs: fix deadlock when allocating tree block during leaf/node split When splitting a leaf or node from one of the trees that are modified when flushing pending block groups (extent, chunk, device and free space trees), we need to allocate a new tree block, which in turn can result in the need to allocate a new block group. After allocating the new block group we may need to flush new block groups that were previously allocated during the course of the current transaction, which is what may cause a deadlock due to attempts to write lock twice the same leaf or node, as when splitting a leaf or node we are holding a write lock on it and its parent node. The same type of deadlock can also happen when increasing the tree's height, since we are holding a lock on the existing root while allocating the tree block to use as the new root node. An example trace when the deadlock happens during the leaf split path is: [27175.293054] CPU: 0 PID: 3005 Comm: kworker/u17:6 Tainted: G W 4.19.16 #1 [27175.293942] Hardware name: Penguin Computing Relion 1900/MD90-FS0-ZB-XX, BIOS R15 06/25/2018 [27175.294846] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs] (...) [27175.298384] RSP: 0018:ffffab2087107758 EFLAGS: 00010246 [27175.299269] RAX: 0000000000000bbd RBX: ffff9fadc7141c48 RCX: 0000000000000001 [27175.300155] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff9fadc7141c48 [27175.301023] RBP: 0000000000000001 R08: ffff9faeb6ac1040 R09: ffff9fa9c0000000 [27175.301887] R10: 0000000000000000 R11: 0000000000000040 R12: ffff9fb21aac8000 [27175.302743] R13: ffff9fb1a64d6a20 R14: 0000000000000001 R15: ffff9fb1a64d6a18 [27175.303601] FS: 0000000000000000(0000) GS:ffff9fb21fa00000(0000) knlGS:0000000000000000 [27175.304468] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [27175.305339] CR2: 00007fdc8743ead8 CR3: 0000000763e0a006 CR4: 00000000003606f0 [27175.306220] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [27175.307087] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [27175.307940] Call Trace: [27175.308802] btrfs_search_slot+0x779/0x9a0 [btrfs] [27175.309669] ? update_space_info+0xba/0xe0 [btrfs] [27175.310534] btrfs_insert_empty_items+0x67/0xc0 [btrfs] [27175.311397] btrfs_insert_item+0x60/0xd0 [btrfs] [27175.312253] btrfs_create_pending_block_groups+0xee/0x210 [btrfs] [27175.313116] do_chunk_alloc+0x25f/0x300 [btrfs] [27175.313984] find_free_extent+0x706/0x10d0 [btrfs] [27175.314855] btrfs_reserve_extent+0x9b/0x1d0 [btrfs] [27175.315707] btrfs_alloc_tree_block+0x100/0x5b0 [btrfs] [27175.316548] split_leaf+0x130/0x610 [btrfs] [27175.317390] btrfs_search_slot+0x94d/0x9a0 [btrfs] [27175.318235] btrfs_insert_empty_items+0x67/0xc0 [btrfs] [27175.319087] alloc_reserved_file_extent+0x84/0x2c0 [btrfs] [27175.319938] __btrfs_run_delayed_refs+0x596/0x1150 [btrfs] [27175.320792] btrfs_run_delayed_refs+0xed/0x1b0 [btrfs] [27175.321643] delayed_ref_async_start+0x81/0x90 [btrfs] [27175.322491] normal_work_helper+0xd0/0x320 [btrfs] [27175.323328] ? move_linked_works+0x6e/0xa0 [27175.324160] process_one_work+0x191/0x370 [27175.324976] worker_thread+0x4f/0x3b0 [27175.325763] kthread+0xf8/0x130 [27175.326531] ? rescuer_thread+0x320/0x320 [27175.327284] ? kthread_create_worker_on_cpu+0x50/0x50 [27175.328027] ret_from_fork+0x35/0x40 [27175.328741] ---[ end trace 300a1b9f0ac30e26 ]--- Fix this by preventing the flushing of new blocks groups when splitting a leaf/node and when inserting a new root node for one of the trees modified by the flushing operation, similar to what is done when COWing a node/leaf from on of these trees. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202383 Reported-by: Eli V CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 78 +++++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f64aad613727..5a6c39b44c84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, return 0; } +static struct extent_buffer *alloc_tree_block_no_bg_flush( + struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 parent_start, + const struct btrfs_disk_key *disk_key, + int level, + u64 hint, + u64 empty_size) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *ret; + + /* + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. + * COWing can result in allocation of a new chunk, and flushing pending + * block groups (btrfs_create_pending_block_groups()) can be triggered + * when finishing allocation of a new chunk. Creation of a pending block + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. + * For similar reasons, we also need to delay flushing pending block + * groups when splitting a leaf or node, from one of those trees, since + * we are holding a write lock on it and its parent or when inserting a + * new root node for one of those trees. + */ + if (root == fs_info->extent_root || + root == fs_info->chunk_root || + root == fs_info->dev_root || + root == fs_info->free_space_root) + trans->can_flush_pending_bgs = false; + + ret = btrfs_alloc_tree_block(trans, root, parent_start, + root->root_key.objectid, disk_key, level, + hint, empty_size); + trans->can_flush_pending_bgs = true; + + return ret; +} + /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked @@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; - /* - * If we are COWing a node/leaf from the extent, chunk, device or free - * space trees, make sure that we do not finish block group creation of - * pending block groups. We do this to avoid a deadlock. - * COWing can result in allocation of a new chunk, and flushing pending - * block groups (btrfs_create_pending_block_groups()) can be triggered - * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk, device and free space trees, - * therefore we could deadlock with ourselves since we are holding a - * lock on an extent buffer that btrfs_create_pending_block_groups() may - * try to COW later. - */ - if (root == fs_info->extent_root || - root == fs_info->chunk_root || - root == fs_info->dev_root || - root == fs_info->free_space_root) - trans->can_flush_pending_bgs = false; - - cow = btrfs_alloc_tree_block(trans, root, parent_start, - root->root_key.objectid, &disk_key, level, - search_start, empty_size); - trans->can_flush_pending_bgs = true; + cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &lower_key, level, root->node->start, 0); + c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); - split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -4260,8 +4282,8 @@ again: else btrfs_item_key(l, &disk_key, mid); - right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); From 7ae710f9f8b2cf95297e7bbfe1c09789a7dc43d4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 27 Jan 2019 22:58:00 +0100 Subject: [PATCH 0237/1436] gpio: vf610: Mask all GPIO interrupts On SoC reset all GPIO interrupts are disable. However, if kexec is used to boot into a new kernel, the SoC does not experience a reset. Hence GPIO interrupts can be left enabled from the previous kernel. It is then possible for the interrupt to fire before an interrupt handler is registered, resulting in the kernel complaining of an "unexpected IRQ trap", the interrupt is never cleared, and so fires again, resulting in an interrupt storm. Disable all GPIO interrupts before registering the GPIO IRQ chip. Fixes: 7f2691a19627 ("gpio: vf610: add gpiolib/IRQ chip driver for Vybrid") Signed-off-by: Andrew Lunn Acked-by: Stefan Agner Signed-off-by: Linus Walleij --- drivers/gpio/gpio-vf610.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 1b79ebcfce3e..541fa6ac399d 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -253,6 +253,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret; port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -319,6 +320,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret; + /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); From 302167c50b32e7fccc98994a91d40ddbbab04e52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jan 2019 09:31:43 -0500 Subject: [PATCH 0238/1436] btrfs: don't end the transaction for delayed refs in throttle Previously callers to btrfs_end_transaction_throttle() would commit the transaction if there wasn't enough delayed refs space. This happens in relocation, and if the fs is relatively empty we'll run out of delayed refs space basically immediately, so we'll just be stuck in this loop of committing the transaction over and over again. This code existed because we didn't have a good feedback mechanism for running delayed refs, but with the delayed refs rsv we do now. Delete this throttling code and let the btrfs_start_transaction() in relocation deal with putting pressure on the delayed refs infrastructure. With this patch we no longer take 5 minutes to balance a metadata only fs. Qu has submitted a fstest to catch slow balance or excessive transaction commits. Steps to reproduce: * create subvolume * create many (eg. 16000) inlined files, of size 2KiB * iteratively snapshot and touch several files to trigger metadata updates * start balance -m Reported-by: Qu Wenruo Fixes: 64403612b73a ("btrfs: rework btrfs_check_space_for_delayed_refs") Signed-off-by: Josef Bacik [ add tags and steps to reproduce ] Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 127fa1535f58..f15cf46f1b9d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_chunk_metadata(trans); - if (lock && should_end_transaction(trans) && - READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { - spin_lock(&info->trans_lock); - if (cur_trans->state == TRANS_STATE_RUNNING) - cur_trans->state = TRANS_STATE_BLOCKED; - spin_unlock(&info->trans_lock); - } - if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { if (throttle) return btrfs_commit_transaction(trans); From 634692ab7007e8e3fec758ab0b26e65abf7c79e0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 17 Jan 2019 11:20:28 +0100 Subject: [PATCH 0239/1436] s390/suspend: fix stack setup in swsusp_arch_suspend The patch that added support for the virtually mapped kernel stacks changed swsusp_arch_suspend to switch to the nodat-stack as the vmap stack is not available while going in and out of suspend. Unfortunately the switch to the nodat-stack is incorrect which breaks suspend to disk. Cc: stable@vger.kernel.org # v4.20 Fixes: ce3dc447493f ("s390: add support for virtually mapped kernel stacks") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/swsusp.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 537f97fde37f..b6796e616812 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -30,10 +30,10 @@ .section .text ENTRY(swsusp_arch_suspend) lg %r1,__LC_NODAT_STACK - aghi %r1,-STACK_FRAME_OVERHEAD stmg %r6,%r15,__SF_GPRS(%r1) + aghi %r1,-STACK_FRAME_OVERHEAD stg %r15,__SF_BACKCHAIN(%r1) - lgr %r1,%r15 + lgr %r15,%r1 /* Store FPU registers */ brasl %r14,save_fpu_regs From 4a8ef6999bce998fa5813023a9a6b56eea329dba Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 21 Nov 2018 12:39:47 +0100 Subject: [PATCH 0240/1436] s390/dasd: fix using offset into zero size array error Dan Carpenter reported the following: The patch 52898025cf7d: "[S390] dasd: security and PSF update patch for EMC CKD ioctl" from Mar 8, 2010, leads to the following static checker warning: drivers/s390/block/dasd_eckd.c:4486 dasd_symm_io() error: using offset into zero size array 'psf_data[]' drivers/s390/block/dasd_eckd.c 4458 /* Copy parms from caller */ 4459 rc = -EFAULT; 4460 if (copy_from_user(&usrparm, argp, sizeof(usrparm))) ^^^^^^^ The user can specify any "usrparm.psf_data_len". They choose zero by mistake. 4461 goto out; 4462 if (is_compat_task()) { 4463 /* Make sure pointers are sane even on 31 bit. */ 4464 rc = -EINVAL; 4465 if ((usrparm.psf_data >> 32) != 0) 4466 goto out; 4467 if ((usrparm.rssd_result >> 32) != 0) 4468 goto out; 4469 usrparm.psf_data &= 0x7fffffffULL; 4470 usrparm.rssd_result &= 0x7fffffffULL; 4471 } 4472 /* alloc I/O data area */ 4473 psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); 4474 rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); 4475 if (!psf_data || !rssd_result) { kzalloc() returns a ZERO_SIZE_PTR (0x16). 4476 rc = -ENOMEM; 4477 goto out_free; 4478 } 4479 4480 /* get syscall header from user space */ 4481 rc = -EFAULT; 4482 if (copy_from_user(psf_data, 4483 (void __user *)(unsigned long) usrparm.psf_data, 4484 usrparm.psf_data_len)) That all works great. 4485 goto out_free; 4486 psf0 = psf_data[0]; 4487 psf1 = psf_data[1]; But now we're assuming that "->psf_data_len" was at least 2 bytes. Fix this by checking the user specified length psf_data_len. Fixes: 52898025cf7d ("[S390] dasd: security and PSF update patch for EMC CKD ioctl") Reported-by: Dan Carpenter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 4e7b55a14b1a..6e294b4d3635 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4469,6 +4469,14 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) usrparm.psf_data &= 0x7fffffffULL; usrparm.rssd_result &= 0x7fffffffULL; } + /* at least 2 bytes are accessed and should be allocated */ + if (usrparm.psf_data_len < 2) { + DBF_DEV_EVENT(DBF_WARNING, device, + "Symmetrix ioctl invalid data length %d", + usrparm.psf_data_len); + rc = -EINVAL; + goto out; + } /* alloc I/O data area */ psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA); rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA); From 8f9aca0c45322a807a343fc32f95f2500f83b9ae Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 23 Jan 2019 13:41:35 +0100 Subject: [PATCH 0241/1436] s390/zcrypt: fix specification exception on z196 during ap probe The older machines don't have the QCI instruction available. With support for up to 256 crypto cards the probing of each card has been extended to check card ids from 0 up to 255. For machines with QCI support there is a filter limiting the range of probed cards. The older machines (z196 and older) don't have this filter and so since support for 256 cards is in the driver all cards are probed. However, these machines also require to have the card id fit into 6 bits. Exceeding this limit results in a specification exception which happens on every kernel startup even when there is no crypto configured and used at all. This fix limits the range of probed crypto cards to 64 if there is no QCI instruction available to obey to the older ap architecture and so fixes the specification exceptions on z196 machines. Cc: stable@vger.kernel.org # v4.17+ Fixes: af4a72276d49 ("s390/zcrypt: Support up to 256 crypto adapters.") Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 48ea0004a56d..5a699746c357 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -248,7 +248,8 @@ static inline int ap_test_config(unsigned int *field, unsigned int nr) static inline int ap_test_config_card_id(unsigned int id) { if (!ap_configuration) /* QCI not supported */ - return 1; + /* only ids 0...3F may be probed */ + return id < 0x40 ? 1 : 0; return ap_test_config(ap_configuration->apm, id); } From 870f193d48c25a97d61a8e6c04e3c29a2c606850 Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:18 -0600 Subject: [PATCH 0242/1436] selftests: net: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reuseport_bpf_numa fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -Wall -Wl,--no-as-needed -O2 -g -I../../../../usr/include/ -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lnuma reuseport_bpf_numa.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa /tmp/ccfUuExT.o: In function `send_from_node': /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:138: undefined reference to `numa_run_on_node' /tmp/ccfUuExT.o: In function `main': /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:230: undefined reference to `numa_available' /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:233: undefined reference to `numa_max_node' It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libnuma. Signed-off-by: Fathi Boudra Signed-off-by: Shuah Khan --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f8f3e90700c0..1e6d14d2825c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread From 7d4e591bc051d3382c45caaa2530969fb42ed23d Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:20 -0600 Subject: [PATCH 0243/1436] selftests: timers: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit posix_timers fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -O2 -pipe -g -feliminate-unused-debug-types -O3 -Wl,-no-as-needed -Wall -DKTEST -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lrt -lpthread posix_timers.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers /tmp/cc1FTZzT.o: In function `check_timer_create': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:157: undefined reference to `timer_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:170: undefined reference to `timer_settime' collect2: error: ld returned 1 exit status It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libpthread. Signed-off-by: Denys Dmytriyenko Signed-off-by: Fathi Boudra Signed-off-by: Shuah Khan --- tools/testing/selftests/timers/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index c02683cfb6c9..7656c7ce79d9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges From 80ff00172407e0aad4b10b94ef0816fc3e7813cb Mon Sep 17 00:00:00 2001 From: Yao Liu Date: Mon, 28 Jan 2019 19:44:14 +0800 Subject: [PATCH 0244/1436] nfs: Fix NULL pointer dereference of dev_name There is a NULL pointer dereference of dev_name in nfs_parse_devname() The oops looks something like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:nfs_fs_mount+0x3b6/0xc20 [nfs] ... Call Trace: ? ida_alloc_range+0x34b/0x3d0 ? nfs_clone_super+0x80/0x80 [nfs] ? nfs_free_parsed_mount_data+0x60/0x60 [nfs] mount_fs+0x52/0x170 ? __init_waitqueue_head+0x3b/0x50 vfs_kern_mount+0x6b/0x170 do_mount+0x216/0xdc0 ksys_mount+0x83/0xd0 __x64_sys_mount+0x25/0x30 do_syscall_64+0x65/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix this by adding a NULL check on dev_name Signed-off-by: Yao Liu Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 22ce3c8a2f46..0570391eaa16 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1895,6 +1895,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end; + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); From 6479450f72c1391c03f08affe0d0110f41ae7ca0 Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Thu, 24 Jan 2019 21:37:08 +0800 Subject: [PATCH 0245/1436] irqchip/gic-v4: Fix occasional VLPI drop 1. In current implementation, every VLPI will temporarily be mapped to the first CPU in system (normally CPU0) and then moved to the real scheduled CPU later. 2. So there is a time window and a VLPI may be sent to CPU0 instead of the real scheduled vCPU, in a multi-CPU virtual machine. 3. However, CPU0 may have not been scheduled as a virtual CPU after system boots up, so the value of its GICR_VPROPBASER is unknown at that moment. 4. If the INTID of VLPI is larger than 2^(GICR_VPROPBASER.IDbits+1), while IDbits is also in unknown state, GIC will behave as if the VLPI is out of range and simply drop it, which results in interrupt missing in Guest. As no code will clear GICR_VPROPBASER at runtime, we can safely initialize the IDbits field at boot time for each CPU to get rid of this issue. We also clear Valid bit of GICR_VPENDBASER in case any ancient programming gets left in and causes memory corrupting. A new function its_clear_vpend_valid() is added to reuse the code in its_vpe_deschedule(). Fixes: e643d8034036 ("irqchip/gic-v3-its: Add VPE scheduling") Signed-off-by: Heyi Guo Signed-off-by: Heyi Guo Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 66 ++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7f2a45445b00..36181197d5e0 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2059,6 +2059,29 @@ static int __init allocate_lpi_tables(void) return 0; } +static u64 its_clear_vpend_valid(void __iomem *vlpi_base) +{ + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + return val; +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -2144,6 +2167,30 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); + if (gic_rdists->has_vlpis) { + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + + /* + * It's possible for CPU to receive VLPIs before it is + * sheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. + */ + val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", + smp_processor_id(), val); + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + /* + * Also clear Valid bit of GICR_VPENDBASER, in case some + * ancient programming gets left in and has possibility of + * corrupting memory. + */ + val = its_clear_vpend_valid(vlpi_base); + WARN_ON(val & GICR_VPENDBASER_Dirty); + } + /* Make sure the GIC has seen the above */ dsb(sy); out: @@ -2755,26 +2802,11 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); - u32 count = 1000000; /* 1s! */ - bool clean; u64 val; - /* We're being scheduled out */ - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + val = its_clear_vpend_valid(vlpi_base); - do { - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - clean = !(val & GICR_VPENDBASER_Dirty); - if (!clean) { - count--; - cpu_relax(); - udelay(1); - } - } while (!clean && count); - - if (unlikely(!clean && !count)) { + if (unlikely(val & GICR_VPENDBASER_Dirty)) { pr_err_ratelimited("ITS virtual pending table not cleaning\n"); vpe->idai = false; vpe->pending_last = true; From 4d741f3dd11c2a8a11536d568f14ab0321169f3d Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 28 Jan 2019 10:24:29 -0800 Subject: [PATCH 0246/1436] Input: snvs_pwrkey - allow selecting driver for i.MX 7D The i.MX SNVS Power Key driver supports the i.MX 7D SoC family too. Allow to enable the i.MX SNVS Power Key driver even if only i.MX 7D SoC is selected. Signed-off-by: Stefan Agner Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 4713957b0cbb..a878351f1643 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -420,7 +420,7 @@ config KEYBOARD_MPR121 config KEYBOARD_SNVS_PWRKEY tristate "IMX SNVS Power Key Driver" - depends on SOC_IMX6SX + depends on SOC_IMX6SX || SOC_IMX7D depends on OF help This is the snvs powerkey driver for the Freescale i.MX application From 3ca232df9921f083c3b37ba5fbc76f4d9046268b Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Mon, 28 Jan 2019 11:13:01 -0800 Subject: [PATCH 0247/1436] Input: pwm-vibra - prevent unbalanced regulator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm_vibrator_stop disables the regulator, but it can be called from multiple places, even when the regulator is already disabled. Fix this by using regulator_is_enabled check when starting and stopping device. Signed-off-by: Jonathan Bakker Signed-off-by: Paweł Chmiel Signed-off-by: Dmitry Torokhov --- drivers/input/misc/pwm-vibra.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/input/misc/pwm-vibra.c b/drivers/input/misc/pwm-vibra.c index 55da191ae550..9df87431d7d4 100644 --- a/drivers/input/misc/pwm-vibra.c +++ b/drivers/input/misc/pwm-vibra.c @@ -34,6 +34,7 @@ struct pwm_vibrator { struct work_struct play_work; u16 level; u32 direction_duty_cycle; + bool vcc_on; }; static int pwm_vibrator_start(struct pwm_vibrator *vibrator) @@ -42,10 +43,13 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) struct pwm_state state; int err; - err = regulator_enable(vibrator->vcc); - if (err) { - dev_err(pdev, "failed to enable regulator: %d", err); - return err; + if (!vibrator->vcc_on) { + err = regulator_enable(vibrator->vcc); + if (err) { + dev_err(pdev, "failed to enable regulator: %d", err); + return err; + } + vibrator->vcc_on = true; } pwm_get_state(vibrator->pwm, &state); @@ -76,7 +80,10 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) static void pwm_vibrator_stop(struct pwm_vibrator *vibrator) { - regulator_disable(vibrator->vcc); + if (vibrator->vcc_on) { + regulator_disable(vibrator->vcc); + vibrator->vcc_on = false; + } if (vibrator->pwm_dir) pwm_disable(vibrator->pwm_dir); From 94803aef3533676194c772383472636c453e3147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chmiel?= Date: Mon, 28 Jan 2019 11:13:34 -0800 Subject: [PATCH 0248/1436] Input: pwm-vibra - stop regulator after disabling pwm, not before MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes order of disable calls in pwm_vibrator_stop. Currently when starting device, we first enable vcc regulator and then setup and enable pwm. When stopping, we should do this in oposite order, so first disable pwm and then disable regulator. Previously order was the same as in start. Signed-off-by: Paweł Chmiel Signed-off-by: Dmitry Torokhov --- drivers/input/misc/pwm-vibra.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/misc/pwm-vibra.c b/drivers/input/misc/pwm-vibra.c index 9df87431d7d4..dbb6d9e1b947 100644 --- a/drivers/input/misc/pwm-vibra.c +++ b/drivers/input/misc/pwm-vibra.c @@ -80,14 +80,14 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) static void pwm_vibrator_stop(struct pwm_vibrator *vibrator) { + if (vibrator->pwm_dir) + pwm_disable(vibrator->pwm_dir); + pwm_disable(vibrator->pwm); + if (vibrator->vcc_on) { regulator_disable(vibrator->vcc); vibrator->vcc_on = false; } - - if (vibrator->pwm_dir) - pwm_disable(vibrator->pwm_dir); - pwm_disable(vibrator->pwm); } static void pwm_vibrator_play_work(struct work_struct *work) From 483cbbeddd5fe2c80fd4141ff0748fa06c4ff146 Mon Sep 17 00:00:00 2001 From: Alexei Naberezhnov Date: Tue, 27 Mar 2018 16:54:16 -0700 Subject: [PATCH 0249/1436] md/raid5: fix 'out of memory' during raid cache recovery This fixes the case when md array assembly fails because of raid cache recovery unable to allocate a stripe, despite attempts to replay stripes and increase cache size. This happens because stripes released by r5c_recovery_replay_stripes and raid5_set_cache_size don't become available for allocation immediately. Released stripes first are placed on conf->released_stripes list and require md thread to merge them on conf->inactive_list before they can be allocated. Patch allows final allocation attempt during cache recovery to wait for new stripes to become availabe for allocation. Cc: linux-raid@vger.kernel.org Cc: Shaohua Li Cc: linux-stable # 4.10+ Fixes: b4c625c67362 ("md/r5cache: r5cache recovery: part 1") Signed-off-by: Alexei Naberezhnov Signed-off-by: Song Liu --- drivers/md/raid5-cache.c | 33 ++++++++++++++++++++++----------- drivers/md/raid5.c | 8 ++++++-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ec3a5ef7fee0..cbbe6b6535be 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1935,12 +1935,14 @@ out: } static struct stripe_head * -r5c_recovery_alloc_stripe(struct r5conf *conf, - sector_t stripe_sect) +r5c_recovery_alloc_stripe( + struct r5conf *conf, + sector_t stripe_sect, + int noblock) { struct stripe_head *sh; - sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0); + sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0); if (!sh) return NULL; /* no more stripe available */ @@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, stripe_sect); if (!sh) { - sh = r5c_recovery_alloc_stripe(conf, stripe_sect); + sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1); /* * cannot get stripe from raid5_get_active_stripe * try replay some stripes @@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, r5c_recovery_replay_stripes( cached_stripe_list, ctx); sh = r5c_recovery_alloc_stripe( - conf, stripe_sect); + conf, stripe_sect, 1); } if (!sh) { + int new_size = conf->min_nr_stripes * 2; pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", mdname(mddev), - conf->min_nr_stripes * 2); - raid5_set_cache_size(mddev, - conf->min_nr_stripes * 2); - sh = r5c_recovery_alloc_stripe(conf, - stripe_sect); + new_size); + ret = raid5_set_cache_size(mddev, new_size); + if (conf->min_nr_stripes <= new_size / 2) { + pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n", + mdname(mddev), + ret, + new_size, + conf->min_nr_stripes, + conf->max_nr_stripes); + return -ENOMEM; + } + sh = r5c_recovery_alloc_stripe( + conf, stripe_sect, 0); } if (!sh) { pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", - mdname(mddev)); + mdname(mddev)); return -ENOMEM; } list_add_tail(&sh->lru, cached_stripe_list); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4990f0319f6c..cecea901ab8c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page) int raid5_set_cache_size(struct mddev *mddev, int size) { + int result = 0; struct r5conf *conf = mddev->private; if (size <= 16 || size > 32768) @@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) - if (!grow_one_stripe(conf, GFP_KERNEL)) + if (!grow_one_stripe(conf, GFP_KERNEL)) { + conf->min_nr_stripes = conf->max_nr_stripes; + result = -ENOMEM; break; + } mutex_unlock(&conf->cache_size_mutex); - return 0; + return result; } EXPORT_SYMBOL(raid5_set_cache_size); From 8d2df8a2d0fa59d1214841202d8e9176057c42ae Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Mon, 7 Jan 2019 23:07:19 +0530 Subject: [PATCH 0250/1436] arch/arm/xen: Remove duplicate header Remove duplicate header which is included twice. Signed-off-by: Souptick Joarder Reviewed-by: Oleksandr Andrushchenko Acked-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- arch/arm/xen/mm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index cb44aa290e73..e1d44b903dfc 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include From 8c79b35693380d856dcbbc21629682a90f26ca62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 28 Jan 2019 10:01:21 -0800 Subject: [PATCH 0251/1436] tools: bpftool: fix crash with un-owned prog arrays Prog arrays don't have 'owner_prog_type' and 'owner_jited' fields in their fdinfo when they are created. Those fields are set and reported when first program is checked for compatibility by bpf_prog_array_compatible(). This means that bpftool cannot expect the fields to always be there. Currently trying to show maps on a system with an un-owned prog array leads to a crash: $ bpftool map show 389: prog_array name tail_call_map flags 0x0 Error: key 'owner_prog_type' not found in fdinfo Error: key 'owner_jited' not found in fdinfo key 4B value 4B max_entries 4 memlock 4096B Segmentation fault (core dumped) We pass a NULL pointer to atoi(). Remove the assumption that fdinfo keys are always present. Add missing validations and remove the p_err() calls which may lead to broken JSON output as caller will not propagate the failure. Fixes: 99a44bef5870 ("tools: bpftool: add owner_prog_type and owner_jited to bpftool output") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 6 +----- tools/bpf/bpftool/map.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 897483457bf0..f7261fad45c1 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key) snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); - if (!fdi) { - p_err("can't open fdinfo: %s", strerror(errno)); + if (!fdi) return NULL; - } while ((n = getline(&line, &line_n, fdi)) > 0) { char *value; @@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key) value = strchr(line, '\t'); if (!value || !value[1]) { - p_err("malformed fdinfo!?"); free(line); return NULL; } @@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key) return line; } - p_err("key '%s' not found in fdinfo", key); free(line); fclose(fdi); return NULL; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 29a3468c6cf6..1ef1ee2280a2 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -513,10 +513,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); } - if (atoi(owner_jited)) - jsonw_bool_field(json_wtr, "owner_jited", true); - else - jsonw_bool_field(json_wtr, "owner_jited", false); + if (owner_jited) + jsonw_bool_field(json_wtr, "owner_jited", + !!atoi(owner_jited)); free(owner_prog_type); free(owner_jited); @@ -569,7 +568,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); char *owner_jited = get_fdinfo(fd, "owner_jited"); - printf("\n\t"); + if (owner_prog_type || owner_jited) + printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); @@ -579,10 +579,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) else printf("owner_prog_type %d ", prog_type); } - if (atoi(owner_jited)) - printf("owner jited"); - else - printf("owner not jited"); + if (owner_jited) + printf("owner%s jited", + atoi(owner_jited) ? "" : " not"); free(owner_prog_type); free(owner_jited); From 1d79895aef18fa05789995d86d523c9b2ee58a02 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 28 Jan 2019 10:13:35 +0100 Subject: [PATCH 0252/1436] sk_msg: Always cancel strp work before freeing the psock Despite having stopped the parser, we still need to deinitialize it by calling strp_done so that it cancels its work. Otherwise the worker thread can run after we have freed the parser, and attempt to access its workqueue resulting in a use-after-free: ================================================================== BUG: KASAN: use-after-free in pwq_activate_delayed_work+0x1b/0x1d0 Read of size 8 at addr ffff888069975240 by task kworker/u2:2/93 CPU: 0 PID: 93 Comm: kworker/u2:2 Not tainted 5.0.0-rc2-00335-g28f9d1a3d4fe-dirty #14 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 Workqueue: (null) (kstrp) Call Trace: print_address_description+0x6e/0x2b0 ? pwq_activate_delayed_work+0x1b/0x1d0 kasan_report+0xfd/0x177 ? pwq_activate_delayed_work+0x1b/0x1d0 ? pwq_activate_delayed_work+0x1b/0x1d0 pwq_activate_delayed_work+0x1b/0x1d0 ? process_one_work+0x4aa/0x660 pwq_dec_nr_in_flight+0x9b/0x100 worker_thread+0x82/0x680 ? process_one_work+0x660/0x660 kthread+0x1b9/0x1e0 ? __kthread_create_on_node+0x250/0x250 ret_from_fork+0x1f/0x30 Allocated by task 111: sk_psock_init+0x3c/0x1b0 sock_map_link.isra.2+0x103/0x4b0 sock_map_update_common+0x94/0x270 sock_map_update_elem+0x145/0x160 __se_sys_bpf+0x152e/0x1e10 do_syscall_64+0xb2/0x3e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 112: kfree+0x7f/0x140 process_one_work+0x40b/0x660 worker_thread+0x82/0x680 kthread+0x1b9/0x1e0 ret_from_fork+0x1f/0x30 The buggy address belongs to the object at ffff888069975180 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 192 bytes inside of 512-byte region [ffff888069975180, ffff888069975380) The buggy address belongs to the page: page:ffffea0001a65d00 count:1 mapcount:0 mapping:ffff88806d401280 index:0x0 compound_mapcount: 0 flags: 0x4000000000010200(slab|head) raw: 4000000000010200 dead000000000100 dead000000000200 ffff88806d401280 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888069975100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888069975180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888069975200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888069975280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888069975300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/netdev/CAJPywTLwgXNEZ2dZVoa=udiZmtrWJ0q5SuBW64aYs0Y1khXX3A@mail.gmail.com Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d6d5c20d7044..8c826603bf36 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -545,8 +545,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); From 2fa53f8924223e81cd85e19f0c2caf110f086752 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 23:55:26 +0100 Subject: [PATCH 0253/1436] bpf, doc: add reviewers to maintainers entry In order to better scale BPF development on netdev, we've adopted a reviewer rotation for all BPF patches among the five of us for some time now. Lets give credit where credit is due, and add Martin, Song and Yonghong as official BPF reviewers to MAINTAINERS file. Also while at it, add regex matching for BPF such that we get properly Cc'ed for files not listed here. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 51029a425dbe..6e13378f9746 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2848,6 +2848,9 @@ F: include/uapi/linux/if_bonding.h BPF (Safe dynamic programs and tools) M: Alexei Starovoitov M: Daniel Borkmann +R: Martin KaFai Lau +R: Song Liu +R: Yonghong Song L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git @@ -2873,6 +2876,8 @@ F: samples/bpf/ F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ +K: bpf +N: bpf BPF JIT for ARM M: Shubham Bansal From b63195698dea6ea83eeede20e38dbc6ad67076b6 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 24 Jan 2019 17:40:34 +0100 Subject: [PATCH 0254/1436] scsi: zfcp: fix sysfs block queue limit output for max_segment_size Since v2.6.35 commit 683229845f17 ("[SCSI] zfcp: Report scatter-gather limits to SCSI and block layer"), zfcp set dma_parms.max_segment_size == PAGE_SIZE (but without using the setter dma_set_max_seg_size()) and scsi_host_template.dma_boundary == PAGE_SIZE - 1. v5.0-rc1 commit 50c2e9107f17 ("scsi: introduce a max_segment_size host_template parameters") introduced a new field scsi_host_template.max_segment_size. If an LLDD such as zfcp does not set it, scsi_host_alloc() uses BLK_MAX_SEGMENT_SIZE = 65536 for Scsi_Host.max_segment_size. __scsi_init_queue() announced the minimum of Scsi_Host.max_segment_size and dma_parms.max_segment_size to the block layer. For zfcp: min(65536, 4096) == 4096 which was still good. v5.0 commit a8cf59a6692c ("scsi: communicate max segment size to the DMA mapping code") announces Scsi_Host.max_segment_size to the block layer and overwrites dma_parms.max_segment_size with Scsi_Host.max_segment_size. For zfcp dma_parms.max_segment_size == Scsi_Host.max_segment_size == 65536 which is also reflected in block queue limits. $ cd /sys/bus/ccw/drivers/zfcp $ cd 0.0.3c40/host5/rport-5:0-4/target5:0:4/5:0:4:10/block/sdi/queue $ cat max_segment_size 65536 Zfcp I/O still works because dma_boundary implicitly still keeps the effective max segment size <= PAGE_SIZE. However, dma_boundary does not seem visible to user space, but max_segment_size is visible and shows a misleading wrong value. Fix it and inherit the stable tag of a8cf59a6692c. Devices on our bus ccw support DMA but no DMA mapping. Of multiple device types on the ccw bus, only zfcp needs dma_parms for SCSI limits. So, leave dma_parms setup in zfcp and do not move it to the bus. Signed-off-by: Steffen Maier Fixes: 50c2e9107f ("scsi: introduce a max_segment_size host_template parameters") Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_aux.c | 1 - drivers/s390/scsi/zfcp_scsi.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 9cf30d124b9e..e390f8c6d5f3 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -403,7 +403,6 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) goto failed; /* report size limit per scatter-gather segment */ - adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN; adapter->ccw_device->dev.dma_parms = &adapter->dma_parms; adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 00acc7144bbc..f4f6a07c5222 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -428,6 +428,8 @@ static struct scsi_host_template zfcp_scsi_host_template = { .max_sectors = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1) * ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8, /* GCD, adjusted later */ + /* report size limit per scatter-gather segment */ + .max_segment_size = ZFCP_QDIO_SBALE_LEN, .dma_boundary = ZFCP_QDIO_SBALE_LEN - 1, .shost_attrs = zfcp_sysfs_shost_attrs, .sdev_attrs = zfcp_sysfs_sdev_attrs, From 5d8fc4a9f0eec20b6c07895022a6bea3fb6dfb38 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 24 Jan 2019 13:25:42 +0800 Subject: [PATCH 0255/1436] scsi: libfc: free skb when receiving invalid flogi resp The issue to be fixed in this commit is when libfc found it received a invalid FLOGI response from FC switch, it would return without freeing the fc frame, which is just the skb data. This would cause memory leak if FC switch keeps sending invalid FLOGI responses. This fix is just to make it execute `fc_frame_free(fp)` before returning from function `fc_lport_flogi_resp`. Signed-off-by: Ming Lu Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_lport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index be83590ed955..ff943f477d6f 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1726,14 +1726,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1743,7 +1743,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; } if (mfs <= lport->mfs) { From 40d07b523cf434f252b134c86b1f8f2d907ffb0b Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Fri, 25 Jan 2019 12:46:09 -0500 Subject: [PATCH 0256/1436] scsi: scsi_debug: fix write_same with virtual_gb problem The WRITE SAME(10) and (16) implementations didn't take account of the buffer wrap required when the virtual_gb parameter is greater than 0. Fix that and rename the fake_store() function to lba2fake_store() to lessen confusion with the global fake_storep pointer. Bump version date. Signed-off-by: Douglas Gilbert Reported-by: Bart Van Assche Tested by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 661512bec3ac..e27f4df24021 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -62,7 +62,7 @@ /* make sure inq_product_rev string corresponds to this version */ #define SDEBUG_VERSION "0188" /* format to fit INQUIRY revision field */ -static const char *sdebug_version_date = "20180128"; +static const char *sdebug_version_date = "20190125"; #define MY_NAME "scsi_debug" @@ -735,7 +735,7 @@ static inline bool scsi_debug_lbp(void) (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10); } -static void *fake_store(unsigned long long lba) +static void *lba2fake_store(unsigned long long lba) { lba = do_div(lba, sdebug_store_sectors); @@ -2514,8 +2514,8 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, return ret; } -/* If fake_store(lba,num) compares equal to arr(num), then copy top half of - * arr into fake_store(lba,num) and return true. If comparison fails then +/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of + * arr into lba2fake_store(lba,num) and return true. If comparison fails then * return false. */ static bool comp_write_worker(u64 lba, u32 num, const u8 *arr) { @@ -2643,7 +2643,7 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec, if (sdt->app_tag == cpu_to_be16(0xffff)) continue; - ret = dif_verify(sdt, fake_store(sector), sector, ei_lba); + ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba); if (ret) { dif_errors++; return ret; @@ -3261,10 +3261,12 @@ err_out: static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { + int ret; unsigned long iflags; unsigned long long i; - int ret; - u64 lba_off; + u32 lb_size = sdebug_sector_size; + u64 block, lbaa; + u8 *fs1p; ret = check_device_access_params(scp, lba, num); if (ret) @@ -3276,31 +3278,30 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, unmap_region(lba, num); goto out; } - - lba_off = lba * sdebug_sector_size; + lbaa = lba; + block = do_div(lbaa, sdebug_store_sectors); /* if ndob then zero 1 logical block, else fetch 1 logical block */ + fs1p = fake_storep + (block * lb_size); if (ndob) { - memset(fake_storep + lba_off, 0, sdebug_sector_size); + memset(fs1p, 0, lb_size); ret = 0; } else - ret = fetch_to_dev_buffer(scp, fake_storep + lba_off, - sdebug_sector_size); + ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { write_unlock_irqrestore(&atomic_rw, iflags); return DID_ERROR << 16; - } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size)) + } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, "%s: %s: lb size=%u, IO sent=%d bytes\n", - my_name, "write same", - sdebug_sector_size, ret); + my_name, "write same", lb_size, ret); /* Copy first sector to remaining blocks */ - for (i = 1 ; i < num ; i++) - memcpy(fake_storep + ((lba + i) * sdebug_sector_size), - fake_storep + lba_off, - sdebug_sector_size); - + for (i = 1 ; i < num ; i++) { + lbaa = lba + i; + block = do_div(lbaa, sdebug_store_sectors); + memmove(fake_storep + (block * lb_size), fs1p, lb_size); + } if (scsi_debug_lbp()) map_region(lba, num); out: From b2d3492fc591b1fb46b81d79ca1fc44cac6ae0ae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2019 13:29:40 +0300 Subject: [PATCH 0257/1436] scsi: bnx2fc: Fix error handling in probe() There are two issues here. First if cmgr->hba is not set early enough then it leads to a NULL dereference. Second if we don't completely initialize cmgr->io_bdt_pool[] then we end up dereferencing uninitialized pointers. Fixes: 853e2bd2103a ("[SCSI] bnx2fc: Broadcom FCoE offload driver") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 350257c13a5b..bc9f2a2365f4 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -240,6 +240,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) return NULL; } + cmgr->hba = hba; cmgr->free_list = kcalloc(arr_sz, sizeof(*cmgr->free_list), GFP_KERNEL); if (!cmgr->free_list) { @@ -256,7 +257,6 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) goto mem_err; } - cmgr->hba = hba; cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1); for (i = 0; i < arr_sz; i++) { @@ -295,7 +295,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) /* Allocate pool of io_bdts - one for each bnx2fc_cmd */ mem_size = num_ios * sizeof(struct io_bdt *); - cmgr->io_bdt_pool = kmalloc(mem_size, GFP_KERNEL); + cmgr->io_bdt_pool = kzalloc(mem_size, GFP_KERNEL); if (!cmgr->io_bdt_pool) { printk(KERN_ERR PFX "failed to alloc io_bdt_pool\n"); goto mem_err; From 8437fcf14deed67e5ad90b5e8abf62fb20f30881 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2019 13:33:27 +0300 Subject: [PATCH 0258/1436] scsi: 53c700: pass correct "dev" to dma_alloc_attrs() The "hostdata->dev" pointer is NULL here. We set "hostdata->dev = dev;" later in the function and we also use "hostdata->dev" when we call dma_free_attrs() in NCR_700_release(). This bug predates git version control. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/53c700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 128d658d472a..16957d7ac414 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript, + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); if(memory == NULL) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); From 9baddb61dfec0215da5d10a1d173790443e276c2 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 21 Jan 2019 23:19:57 +0000 Subject: [PATCH 0259/1436] mfd: Fix unmet dependency warning for MFD_TPS68470 After commit 5d32a66541c4 ("PCI/ACPI: Allow ACPI to be built without CONFIG_PCI set") dependencies on CONFIG_PCI that previously were satisfied implicitly through dependencies on CONFIG_ACPI have to be specified directly. WARNING: unmet direct dependencies detected for I2C_DESIGNWARE_PLATFORM Depends on [n]: I2C [=y] && HAS_IOMEM [=y] && (ACPI [=y] && COMMON_CLK [=n] || !ACPI [=y]) Selected by [y]: - MFD_TPS68470 [=y] && HAS_IOMEM [=y] && ACPI [=y] && I2C [=y]=y MFD_TPS68470 is an ACPI only device and selects I2C_DESIGNWARE_PLATFORM. I2C_DESIGNWARE_PLATFORM does not have any configuration today for ACPI support without CONFIG_PCI set. For sake of a quick fix this introduces a new mandatory dependency to the driver which may survive without it. Otherwise we need to revisit the driver architecture to address this properly. Fixes: 5d32a66541c46 ("PCI/ACPI: Allow ACPI to be built without CONFIG_PCI set") Signed-off-by: Sinan Kaya Acked-by: Lee Jones Signed-off-by: Rafael J. Wysocki --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f461460a2aeb..76f9909cf396 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1419,7 +1419,7 @@ config MFD_TPS65217 config MFD_TPS68470 bool "TI TPS68470 Power Management / LED chips" - depends on ACPI && I2C=y + depends on ACPI && PCI && I2C=y select MFD_CORE select REGMAP_I2C select I2C_DESIGNWARE_PLATFORM From d58bf90a32a33becec78c3034e781735049fcd25 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 24 Jan 2019 19:31:00 +0000 Subject: [PATCH 0260/1436] platform/x86: Fix unmet dependency warning for ACPI_CMPC Add BACKLIGHT_LCD_SUPPORT for ACPI_CMPC to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE. ACPI_CMPC selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT. Copy BACKLIGHT_LCD_SUPPORT dependency into ACPI_CMPC to fix WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - ACPI_CMPC [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && INPUT [=y] && (RFKILL [=n] || RFKILL [=n]=n) Signed-off-by: Sinan Kaya Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5e2109c54c7c..11810591840d 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -905,6 +905,7 @@ config TOSHIBA_WMI config ACPI_CMPC tristate "CMPC Laptop Extras" depends on ACPI && INPUT + depends on BACKLIGHT_LCD_SUPPORT depends on RFKILL || RFKILL=n select BACKLIGHT_CLASS_DEVICE help From 0ee4b5f801b73b83a9fb3921d725f2162fd4a2e5 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 24 Jan 2019 19:31:01 +0000 Subject: [PATCH 0261/1436] platform/x86: Fix unmet dependency warning for SAMSUNG_Q10 Add BACKLIGHT_LCD_SUPPORT for SAMSUNG_Q10 to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE. SAMSUNG_Q10 selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT. Copy BACKLIGHT_LCD_SUPPORT dependency into SAMSUNG_Q10 to fix: WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - SAMSUNG_Q10 [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] Signed-off-by: Sinan Kaya Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 11810591840d..b5e9db85e881 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1129,6 +1129,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 From 13054abbaa4f1fd4e6f3b4b63439ec033b4c8035 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 29 Jan 2019 11:58:35 +0100 Subject: [PATCH 0262/1436] HID: debug: fix the ring buffer implementation Ring buffer implementation in hid_debug_event() and hid_debug_events_read() is strange allowing lost or corrupted data. After commit 717adfdaf147 ("HID: debug: check length before copy_to_user()") it is possible to enter an infinite loop in hid_debug_events_read() by providing 0 as count, this locks up a system. Fix this by rewriting the ring buffer implementation with kfifo and simplify the code. This fixes CVE-2019-3819. v2: fix an execution logic and add a comment v3: use __set_current_state() instead of set_current_state() Link: https://bugzilla.redhat.com/show_bug.cgi?id=1669187 Cc: stable@vger.kernel.org # v4.18+ Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Fixes: 717adfdaf147 ("HID: debug: check length before copy_to_user()") Signed-off-by: Vladis Dronov Reviewed-by: Oleg Nesterov Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-debug.c | 122 +++++++++++++++----------------------- include/linux/hid-debug.h | 9 ++- 2 files changed, 52 insertions(+), 79 deletions(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c530476edba6..ac9fda1b5a72 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - unsigned i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; buf[i]; i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } - - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; + } + + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); + if (ret) goto out; - - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) - goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; @@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1246,4 +1221,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - From 3f47d00bc65ba11b3e5753d733a74ca4f7af3240 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Jan 2019 09:01:05 +0100 Subject: [PATCH 0263/1436] drm/vmwgfx: remove CONFIG_X86 ifdefs The driver depends on CONFIG_X86 so these are dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 25afb1d594e3..69e325b2d954 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -565,7 +565,6 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Keeping DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; -#ifdef CONFIG_X86 const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev); #ifdef CONFIG_INTEL_IOMMU @@ -607,11 +606,6 @@ out_fixup: if (dev_priv->map_mode == vmw_dma_alloc_coherent) return -EINVAL; #endif - -#else /* CONFIG_X86 */ - dev_priv->map_mode = vmw_dma_map_populate; -#endif /* CONFIG_X86 */ - DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); return 0; From 9b5bf2421b43ef85568f9b875d6387a114e92efe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Jan 2019 09:01:06 +0100 Subject: [PATCH 0264/1436] drm/vmwgfx: remove CONFIG_INTEL_IOMMU ifdefs v2 intel_iommu_enabled is defined as always false for !CONFIG_INTEL_IOMMU, so remove the ifdefs around it. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 69e325b2d954..b7777b5b4a81 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -567,12 +567,10 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_bind] = "Giving up DMA mappings early."}; const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev); -#ifdef CONFIG_INTEL_IOMMU if (intel_iommu_enabled) { dev_priv->map_mode = vmw_dma_map_populate; goto out_fixup; } -#endif if (!(vmw_force_iommu || vmw_force_coherent)) { dev_priv->map_mode = vmw_dma_phys; @@ -589,9 +587,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) dev_priv->map_mode = vmw_dma_map_populate; #endif -#ifdef CONFIG_INTEL_IOMMU out_fixup: -#endif if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; @@ -599,13 +595,11 @@ out_fixup: if (vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; -#if !defined(CONFIG_SWIOTLB) && !defined(CONFIG_INTEL_IOMMU) - /* - * No coherent page pool - */ - if (dev_priv->map_mode == vmw_dma_alloc_coherent) + /* No TTM coherent page pool? FIXME: Ask TTM instead! */ + if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && + (dev_priv->map_mode == vmw_dma_alloc_coherent)) return -EINVAL; -#endif + DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); return 0; @@ -619,7 +613,6 @@ out_fixup: * With 32-bit we can only handle 32 bit PFNs. Optionally set that * restriction also for 64-bit systems. */ -#ifdef CONFIG_INTEL_IOMMU static int vmw_dma_masks(struct vmw_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -631,12 +624,6 @@ static int vmw_dma_masks(struct vmw_private *dev_priv) } return 0; } -#else -static int vmw_dma_masks(struct vmw_private *dev_priv) -{ - return 0; -} -#endif static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) { From 2b3cd6249b14e25da14f13cd520eb336230a4422 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Jan 2019 09:01:07 +0100 Subject: [PATCH 0265/1436] drm/vmwgfx: fix the check when to use dma_alloc_coherent Since Linux 4.21 we merged the swiotlb ops into the DMA direct ops, so they would always have a the sync_single methods. But late in the cicle we also removed the direct ops entirely, so we'd see NULL DMA ops. Switch vmw_dma_select_mode to only detect swiotlb presence using swiotlb_nr_tbl() instead. Fixes: 55897af630 ("dma-direct: merge swiotlb_dma_ops into the dma_direct code") Fixes: 356da6d0cd ("dma-mapping: bypass indirect calls for dma-direct") Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b7777b5b4a81..1456101e67a9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -565,7 +565,6 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Keeping DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev); if (intel_iommu_enabled) { dev_priv->map_mode = vmw_dma_map_populate; @@ -578,14 +577,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) return 0; } - dev_priv->map_mode = vmw_dma_map_populate; - - if (dma_ops && dma_ops->sync_single_for_cpu) - dev_priv->map_mode = vmw_dma_alloc_coherent; #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl() == 0) - dev_priv->map_mode = vmw_dma_map_populate; + if (swiotlb_nr_tbl()) + dev_priv->map_mode = vmw_dma_alloc_coherent; + else #endif + dev_priv->map_mode = vmw_dma_map_populate; out_fixup: if (dev_priv->map_mode == vmw_dma_map_populate && From 05f9467e70ed7c9cd19fd3d42346941cdc03eef3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Jan 2019 09:01:08 +0100 Subject: [PATCH 0266/1436] drm/vmwgfx: unwind spaghetti code in vmw_dma_select_mode Just use a simple if/else chain to select the DMA mode. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 35 +++++++++-------------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 1456101e67a9..3e2bcff34032 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -566,31 +566,19 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Keeping DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - if (intel_iommu_enabled) { - dev_priv->map_mode = vmw_dma_map_populate; - goto out_fixup; - } - - if (!(vmw_force_iommu || vmw_force_coherent)) { - dev_priv->map_mode = vmw_dma_phys; - DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); - return 0; - } - -#ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) - dev_priv->map_mode = vmw_dma_alloc_coherent; - else -#endif - dev_priv->map_mode = vmw_dma_map_populate; - -out_fixup: - if (dev_priv->map_mode == vmw_dma_map_populate && - vmw_restrict_iommu) - dev_priv->map_mode = vmw_dma_map_bind; - if (vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; + else if (intel_iommu_enabled) + dev_priv->map_mode = vmw_dma_map_populate; + else if (!vmw_force_iommu) + dev_priv->map_mode = vmw_dma_phys; + else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl()) + dev_priv->map_mode = vmw_dma_alloc_coherent; + else + dev_priv->map_mode = vmw_dma_map_populate; + + if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu) + dev_priv->map_mode = vmw_dma_map_bind; /* No TTM coherent page pool? FIXME: Ask TTM instead! */ if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) && @@ -598,7 +586,6 @@ out_fixup: return -EINVAL; DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); - return 0; } From 0899dd34d095a352de244055aa611bbb3208e8cb Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 26 Oct 2018 10:01:39 +0300 Subject: [PATCH 0267/1436] iwlwifi: pcie: fix the use of a wrong define The code checks that we haven't exceeded the maximum number of TBs by comparing to a define of gen1 instead of gen2, fix it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index d2bd7f528a20..8807ea9d693c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -216,7 +216,7 @@ static int iwl_pcie_gen2_set_tb(struct iwl_trans *trans, int idx = iwl_pcie_gen2_get_num_tbs(trans, tfd); struct iwl_tfh_tb *tb; - if (WARN_ON(idx >= IWL_NUM_OF_TBS)) + if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) return -EINVAL; tb = &tfd->tbs[idx]; From 937c2652768f931ac2f8ce0aa35476ccb0a603d0 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 24 Oct 2018 12:36:18 +0300 Subject: [PATCH 0268/1436] iwlwifi: iwlmvm: ignore HE PPDU type regarding EOF When setting the EOF bit in Rx flags (propagated to radiotap) do not depend it on the PPDU type (SU/MU/TB) since it doesn't. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7bd8676508f5..786e93f6b502 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1158,14 +1158,12 @@ static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb, /* temporarily hide the radiotap data */ __skb_pull(skb, radiotap_len); - if (phy_data->info_type == IWL_RX_PHY_INFO_TYPE_HE_SU) { - /* report the AMPDU-EOF bit on single frames */ - if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) { - rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; - if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF)) - rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; - } + /* report the AMPDU-EOF bit on single frames */ + if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) { + rx_status->flag |= RX_FLAG_AMPDU_DETAILS; + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; + if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF)) + rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; } if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) @@ -1178,9 +1176,7 @@ static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb, bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; /* toggle is switched whenever new aggregation starts */ - if (toggle_bit != mvm->ampdu_toggle && - (he_type == RATE_MCS_HE_TYPE_MU || - he_type == RATE_MCS_HE_TYPE_SU)) { + if (toggle_bit != mvm->ampdu_toggle) { rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN; if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF)) rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT; From c97781d1d9563dc594074177dfedd848da648223 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 24 Oct 2018 18:37:46 +0300 Subject: [PATCH 0269/1436] iwlwifi: iwlmvm: in monitor NDP notif take the NSS from rx_vec Take the NSS value from 'rx_vec' rather than from 'rate_n_flags'. The rate_n_flags has only 2 bits for the NSS giving a max of 4SS (0 = 1SS etc.). Since there may be up to 8SS use the rx_vec which has 3 bits for the NSS. While at it, fix the rx_vec array to length of 2. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/api/rx.h | 5 ++++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index b4f7ea30a7c1..11c25f32a286 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -719,6 +719,9 @@ struct iwl_rx_mpdu_desc { #define RX_NO_DATA_FRAME_TIME_POS 0 #define RX_NO_DATA_FRAME_TIME_MSK (0xfffff << RX_NO_DATA_FRAME_TIME_POS) +#define RX_NO_DATA_RX_VEC0_HE_NSTS_MSK 0x03800000 +#define RX_NO_DATA_RX_VEC0_VHT_NSTS_MSK 0x38000000 + /** * struct iwl_rx_no_data - RX no data descriptor * @info: 7:0 frame type, 15:8 RX error type @@ -739,7 +742,7 @@ struct iwl_rx_no_data { __le32 fr_time; __le32 rate; __le32 phy_info[2]; - __le32 rx_vec[3]; + __le32 rx_vec[2]; } __packed; /* RX_NO_DATA_NTFY_API_S_VER_1 */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 786e93f6b502..07ddd7bc07ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1701,15 +1701,24 @@ void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi, } else if (rate_n_flags & RATE_MCS_VHT_MSK) { u8 stbc = (rate_n_flags & RATE_MCS_STBC_MSK) >> RATE_MCS_STBC_POS; - rx_status->nss = - ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >> - RATE_VHT_MCS_NSS_POS) + 1; rx_status->rate_idx = rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK; rx_status->encoding = RX_ENC_VHT; rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT; if (rate_n_flags & RATE_MCS_BF_MSK) rx_status->enc_flags |= RX_ENC_FLAG_BF; - } else if (!(rate_n_flags & RATE_MCS_HE_MSK)) { + /* + * take the nss from the rx_vec since the rate_n_flags has + * only 2 bits for the nss which gives a max of 4 ss but + * there may be up to 8 spatial streams + */ + rx_status->nss = + le32_get_bits(desc->rx_vec[0], + RX_NO_DATA_RX_VEC0_VHT_NSTS_MSK) + 1; + } else if (rate_n_flags & RATE_MCS_HE_MSK) { + rx_status->nss = + le32_get_bits(desc->rx_vec[0], + RX_NO_DATA_RX_VEC0_HE_NSTS_MSK) + 1; + } else { int rate = iwl_mvm_legacy_rate_to_mac80211_idx(rate_n_flags, rx_status->band); From 85d78bb173db2b12b30ae786e54038046d6d9d47 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 24 Oct 2018 13:43:26 +0300 Subject: [PATCH 0270/1436] iwlwifi: pcie: add prints to track virtual ID In case there are bugs in this area, this data can help with debugging. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 541da1d44762..4ecf87c1cda3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -256,6 +256,9 @@ static void iwl_pcie_restock_bd(struct iwl_trans *trans, bd[rxq->write] = cpu_to_le64(rxb->page_dma | rxb->vid); } + + IWL_DEBUG_RX(trans, "Assigned virtual RB ID %u to queue %d index %d\n", + (u32)rxb->vid, rxq->id, rxq->write); } /* @@ -1342,6 +1345,8 @@ static struct iwl_rx_mem_buffer *iwl_pcie_get_rxb(struct iwl_trans *trans, if (rxb->invalid) goto out_err; + IWL_DEBUG_RX(trans, "Got virtual RB ID %u\n", (u32)rxb->vid); + if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560) rxb->size = le32_to_cpu(rxq->cd[i].size) & IWL_RX_CD_SIZE; @@ -1393,11 +1398,12 @@ restart: emergency = true; } + IWL_DEBUG_RX(trans, "Q %d: HW = %d, SW = %d\n", rxq->id, r, i); + rxb = iwl_pcie_get_rxb(trans, rxq, i); if (!rxb) goto out; - IWL_DEBUG_RX(trans, "Q %d: HW = %d, SW = %d\n", rxq->id, r, i); iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency, i); i = (i + 1) & (rxq->queue_size - 1); From 677837b8b3ea10bcffa0ec8aec503a362b52c8a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2018 11:04:30 +0200 Subject: [PATCH 0271/1436] iwlwifi: mvm: fix %16 to %016 print format With just %16, it means 16 characters padding, but we really don't want to print "0x 1F4547B", but instead want to have this filled with zeroes, so we need the 0. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 11714eb8da5e..b1d6dea7672e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1345,7 +1345,7 @@ void iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, agg_status = iwl_mvm_get_agg_status(mvm, beacon_notify_hdr); status = le16_to_cpu(agg_status->status) & TX_STATUS_MSK; IWL_DEBUG_RX(mvm, - "beacon status %#x retries:%d tsf:0x%16llX gp2:0x%X rate:%d\n", + "beacon status %#x retries:%d tsf:0x%016llX gp2:0x%X rate:%d\n", status, beacon_notify_hdr->failure_frame, le64_to_cpu(beacon->tsf), mvm->ap_last_beacon_gp2, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index e8659bf50890..5dbbd35ee630 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -248,7 +248,7 @@ void iwl_mvm_rx_fw_error(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) IWL_ERR(mvm, "FW Error notification: seq 0x%04X service 0x%08X\n", le16_to_cpu(err_resp->bad_cmd_seq_num), le32_to_cpu(err_resp->error_service)); - IWL_ERR(mvm, "FW Error notification: timestamp 0x%16llX\n", + IWL_ERR(mvm, "FW Error notification: timestamp 0x%016llX\n", le64_to_cpu(err_resp->timestamp)); } From 486af86332c7f062e0fc63c604023f2bbe2c50d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Oct 2018 09:44:03 +0100 Subject: [PATCH 0272/1436] iwlwifi: mvm: read IWL_RX_MPDU_PHY_SHORT_PREAMBLE only for CCK Due to a general shortage of RX API bits, the firmware is going to reuse this bit on non-CCK frames to mean something else. Use it only on CCK frames to prepare for that change. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 07ddd7bc07ce..9342f61b702e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1408,7 +1408,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->flag |= RX_FLAG_FAILED_FCS_CRC; } /* set the preamble flag if appropriate */ - if (phy_info & IWL_RX_MPDU_PHY_SHORT_PREAMBLE) + if (rate_n_flags & RATE_MCS_CCK_MSK && + phy_info & IWL_RX_MPDU_PHY_SHORT_PREAMBLE) rx_status->enc_flags |= RX_ENC_FLAG_SHORTPRE; if (likely(!(phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD))) { From cefec29ebdde94b364e5b31a802034df259344ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Oct 2018 09:04:36 +0200 Subject: [PATCH 0273/1436] iwlwifi: pcie: align licensing to dual GPL/BSD These files have a long history of code changes, but analysing the remaining code leads to having only a few changes that are not already owned by Intel, notably from - Andy Lutomirski - Joonwoo Park - Kirtika Ruchandani - Rajat Jain - Stanislaw Gruszka remaining in the code today. Note that - I myself was working for Intel and for any possibly code that might be before my employment there give permission - Wizery employees were working for Intel More specifically, we identified the following commits that (partially may) remain today: 25c03d8e8c13 Joonwoo Park ("iwlwifi: do not schedule tasklet when rcv unused irq") f36d04abe684 Stanislaw Gruszka ("iwlwifi: use dma_alloc_coherent") 387f3381f732 Stanislaw Gruszka ("iwlwifi: fix dma mappings and skbs leak") 2624e96ce16b Stanislaw Gruszka ("iwlwifi: fix possible data overwrite in hcmd callback") bfe4b80e9f73 Stanislaw Gruszka ("iwlwifi: always check if got h/w access before write") d536c32b45d2 Andy Lutomirski ("iwlwifi: pcie: log when waking the NIC for hcmd submission fails") a6d24fad00d9 Rajat Jain ("iwlwifi: pcie: dump registers when HW becomes inaccessible") fb12777ab59b Kirtika Ruchandani ("iwlwifi: Add more call-sites for pcie reg dumper") 3a73a30049f2 Stanislaw Gruszka ("iwlwifi: cleanup/fix memory barriers") aa5affbacb24 Stanislaw Gruszka ("iwlwifi: dump stack when fail to gain access to the device") Align the licenses with their permission to clean up and to make it all identical. CC: Joonwoo Park CC: Stanislaw Gruszka CC: Andy Lutomirski CC: Rajat Jain CC: Kirtika Ruchandani Acked-by: Johannes Berg Acked-by: Kirtika Ruchandani Acked-by: Stanislaw Gruszka Acked-by: Joonwoo Park Acked-by: Rajat Jain Acked-by: Andy Lutomirski Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-io.c | 41 +++++++++++++++-- drivers/net/wireless/intel/iwlwifi/iwl-io.h | 38 ++++++++++++++-- .../wireless/intel/iwlwifi/pcie/internal.h | 44 +++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 44 +++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 44 +++++++++++++++++-- 5 files changed, 192 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 4f10914f6048..ffd1e649bfa0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -1,10 +1,13 @@ /****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * - * Portions of this file are derived from the ipw3945 project. - * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -15,12 +18,44 @@ * more details. * * The full GNU General Public License is included in this distribution in the - * file called LICENSE. + * file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * + * BSD LICENSE + * + * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2015 - 2016 Intel Deutschland GmbH + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h index 38085850a2d3..61477e58352d 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h @@ -1,8 +1,9 @@ /****************************************************************************** * - * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. * - * Portions of this file are derived from the ipw3945 project. + * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -14,14 +15,43 @@ * more details. * * The full GNU General Public License is included in this distribution in the - * file called LICENSE. + * file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * + * BSD LICENSE + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ - #ifndef __iwl_io_h__ #define __iwl_io_h__ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 971785b17e09..0d16bcc3141f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -1,13 +1,15 @@ /****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY * * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * Copyright(c) 2018 Intel Corporation * - * Portions of this file are derived from the ipw3945 project, as well - * as portions of the ieee80211 subsystem header files. - * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,12 +20,46 @@ * more details. * * The full GNU General Public License is included in this distribution in the - * file called LICENSE. + * file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * + * BSD LICENSE + * + * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ #ifndef __iwl_trans_int_pcie_h__ #define __iwl_trans_int_pcie_h__ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 4ecf87c1cda3..77c0b343456d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1,13 +1,15 @@ /****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * Copyright(c) 2018 Intel Corporation * - * Portions of this file are derived from the ipw3945 project, as well - * as portions of the ieee80211 subsystem header files. - * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,12 +20,46 @@ * more details. * * The full GNU General Public License is included in this distribution in the - * file called LICENSE. + * file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * + * BSD LICENSE + * + * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 7073116c73b7..07395502f419 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1,13 +1,15 @@ /****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * Copyright(c) 2018 Intel Corporation * - * Portions of this file are derived from the ipw3945 project, as well - * as portions of the ieee80211 subsystem header files. - * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. @@ -18,12 +20,46 @@ * more details. * * The full GNU General Public License is included in this distribution in the - * file called LICENSE. + * file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * + * BSD LICENSE + * + * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ #include #include From e6aeeb4f45178197c956a5795f49648db67607bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Oct 2018 13:50:05 +0100 Subject: [PATCH 0274/1436] iwlwifi: mvm: clean up LDBG config command usage Clean up the LDBG config command to not be called "continuous recording", and while at it actually remove the continuous recording implementation completely since it was only used for store & forward architectures. This also fixes a bug at least in iwl_fw_dbg_buffer_allocation() because what's now "__le32 type" (matching the firmware) used to be "__le16 enable_recording", so the buffer allocation config sub-struct would erroneously have started at the wrong offset. In the other cases this didn't actually lead to a bug as other bytes in pad[] were all zeroes, so accessing the 16-bit value as a 32-bit value wouldn't make a difference (in little endian.) Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/fw/api/debug.h | 33 ++++++-------- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 12 +++--- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 13 +++--- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 43 ------------------- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 ++ 5 files changed, 29 insertions(+), 75 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h index dc1fa377087a..988584973aba 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h @@ -335,29 +335,11 @@ struct iwl_dbg_mem_access_rsp { __le32 data[]; } __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */ -#define CONT_REC_COMMAND_SIZE 80 -#define ENABLE_CONT_RECORDING 0x15 -#define DISABLE_CONT_RECORDING 0x16 +#define LDBG_CFG_COMMAND_SIZE 80 #define BUFFER_ALLOCATION 0x27 #define START_DEBUG_RECORDING 0x29 #define STOP_DEBUG_RECORDING 0x2A -/* - * struct iwl_continuous_record_mode - recording mode - */ -struct iwl_continuous_record_mode { - __le16 enable_recording; -} __packed; - -/* - * struct iwl_continuous_record_cmd - enable/disable continuous recording - */ -struct iwl_continuous_record_cmd { - struct iwl_continuous_record_mode record_mode; - u8 pad[CONT_REC_COMMAND_SIZE - - sizeof(struct iwl_continuous_record_mode)]; -} __packed; - /* maximum fragments to be allocated per target of allocationId */ #define IWL_BUFFER_LOCATION_MAX_FRAGS 2 @@ -385,4 +367,17 @@ struct iwl_buffer_allocation_cmd { struct iwl_fragment_data fragments[IWL_BUFFER_LOCATION_MAX_FRAGS]; } __packed; /* BUFFER_ALLOCATION_CMD_API_S_VER_1 */ +/** + * struct iwl_ldbg_config_cmd - LDBG config command + * @type: configuration type + * @pad: reserved space for type-dependent data + */ +struct iwl_ldbg_config_cmd { + __le32 type; + union { + u8 pad[LDBG_CFG_COMMAND_SIZE - sizeof(__le32)]; + struct iwl_buffer_allocation_cmd buffer_allocation; + }; /* LDBG_CFG_BODY_API_U_VER_2 (partially) */ +} __packed; /* LDBG_CFG_CMD_API_S_VER_2 */ + #endif /* __iwl_fw_api_debug_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index a97bf17da14d..917aafe1a9db 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1678,20 +1678,20 @@ iwl_fw_dbg_buffer_allocation(struct iwl_fw_runtime *fwrt, struct iwl_fw_ini_allocation_tlv *alloc) { struct iwl_trans *trans = fwrt->trans; - struct iwl_continuous_record_cmd cont_rec = {}; - struct iwl_buffer_allocation_cmd *cmd = (void *)&cont_rec.pad[0]; + struct iwl_ldbg_config_cmd ldbg_cmd = { + .type = cpu_to_le32(BUFFER_ALLOCATION), + }; + struct iwl_buffer_allocation_cmd *cmd = &ldbg_cmd.buffer_allocation; struct iwl_host_cmd hcmd = { .id = LDBG_CONFIG_CMD, .flags = CMD_ASYNC, - .data[0] = &cont_rec, - .len[0] = sizeof(cont_rec), + .data[0] = &ldbg_cmd, + .len[0] = sizeof(ldbg_cmd), }; void *virtual_addr = NULL; u32 size = le32_to_cpu(alloc->size); dma_addr_t phys_addr; - cont_rec.record_mode.enable_recording = cpu_to_le16(BUFFER_ALLOCATION); - if (!trans->num_blocks && le32_to_cpu(alloc->buffer_location) != IWL_FW_INI_LOCATION_DRAM_PATH) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 40dcc3deb257..7cd206e6fb9e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -271,18 +271,17 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt, static int iwl_fw_dbg_start_stop_hcmd(struct iwl_fw_runtime *fwrt, bool start) { - struct iwl_continuous_record_cmd cont_rec = {}; + struct iwl_ldbg_config_cmd cmd = { + .type = start ? cpu_to_le32(START_DEBUG_RECORDING) : + cpu_to_le32(STOP_DEBUG_RECORDING), + }; struct iwl_host_cmd hcmd = { .id = LDBG_CONFIG_CMD, .flags = CMD_ASYNC, - .data[0] = &cont_rec, - .len[0] = sizeof(cont_rec), + .data[0] = &cmd, + .len[0] = sizeof(cmd), }; - cont_rec.record_mode.enable_recording = start ? - cpu_to_le16(START_DEBUG_RECORDING) : - cpu_to_le16(STOP_DEBUG_RECORDING); - return iwl_trans_send_cmd(fwrt->trans, &hcmd); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 52c361a6124c..514aaec6d59e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1206,47 +1206,6 @@ static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file, return simple_read_from_buffer(user_buf, count, ppos, buf, pos); } -/* - * Enable / Disable continuous recording. - * Cause the FW to start continuous recording, by sending the relevant hcmd. - * Enable: input of every integer larger than 0, ENABLE_CONT_RECORDING. - * Disable: for 0 as input, DISABLE_CONT_RECORDING. - */ -static ssize_t iwl_dbgfs_cont_recording_write(struct iwl_mvm *mvm, - char *buf, size_t count, - loff_t *ppos) -{ - struct iwl_trans *trans = mvm->trans; - const struct iwl_fw_dbg_dest_tlv_v1 *dest = trans->dbg_dest_tlv; - struct iwl_continuous_record_cmd cont_rec = {}; - int ret, rec_mode; - - if (!iwl_mvm_firmware_running(mvm)) - return -EIO; - - if (!dest) - return -EOPNOTSUPP; - - if (dest->monitor_mode != SMEM_MODE || - trans->cfg->device_family < IWL_DEVICE_FAMILY_8000) - return -EOPNOTSUPP; - - ret = kstrtoint(buf, 0, &rec_mode); - if (ret) - return ret; - - cont_rec.record_mode.enable_recording = rec_mode ? - cpu_to_le16(ENABLE_CONT_RECORDING) : - cpu_to_le16(DISABLE_CONT_RECORDING); - - mutex_lock(&mvm->mutex); - ret = iwl_mvm_send_cmd_pdu(mvm, LDBG_CONFIG_CMD, 0, - sizeof(cont_rec), &cont_rec); - mutex_unlock(&mvm->mutex); - - return ret ?: count; -} - static ssize_t iwl_dbgfs_fw_dbg_conf_write(struct iwl_mvm *mvm, char *buf, size_t count, loff_t *ppos) @@ -1800,7 +1759,6 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(scan_ant_rxchain, 8); MVM_DEBUGFS_READ_WRITE_FILE_OPS(d0i3_refs, 8); MVM_DEBUGFS_READ_WRITE_FILE_OPS(fw_dbg_conf, 8); MVM_DEBUGFS_WRITE_FILE_OPS(fw_dbg_collect, 64); -MVM_DEBUGFS_WRITE_FILE_OPS(cont_recording, 8); MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8); MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl, (IWL_RSS_INDIRECTION_TABLE_SIZE * 2)); @@ -2004,7 +1962,6 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(fw_dbg_collect, mvm->debugfs_dir, 0200); MVM_DEBUGFS_ADD_FILE(max_amsdu_len, mvm->debugfs_dir, 0200); MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, 0200); - MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, 0200); MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, 0200); MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, 0200); #ifdef CONFIG_ACPI diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index d257c143e624..ee2ef3f0042c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -737,6 +737,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, trans_cfg.rx_buf_size = rb_size_default; } + BUILD_BUG_ON(sizeof(struct iwl_ldbg_config_cmd) != + LDBG_CFG_COMMAND_SIZE); + trans->wide_cmd_header = true; trans_cfg.bc_table_dword = mvm->trans->cfg->device_family < IWL_DEVICE_FAMILY_22560; From 0f8bf03c86602cb0cb1236f49a0c9340f786f5b4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 13 Aug 2018 15:30:35 +0300 Subject: [PATCH 0275/1436] iwlwifi: mvm: save and export regdb blob from the NVM Sometimes we want to debug issues related to the regulatory blob in the NVM. To make that easier, add a debugfs entry to export it together with the other nvm blobs we export. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 514aaec6d59e..cbbd4b076e17 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -2028,6 +2028,9 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) if (!debugfs_create_blob("nvm_phy_sku", 0400, mvm->debugfs_dir, &mvm->nvm_phy_sku_blob)) goto err; + if (!debugfs_create_blob("nvm_reg", S_IRUSR, + mvm->debugfs_dir, &mvm->nvm_reg_blob)) + goto err; debugfs_create_file("mem", 0600, dbgfs_dir, mvm, &iwl_dbgfs_mem_ops); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 503e51d32e7f..c314f77f657f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -965,6 +965,7 @@ struct iwl_mvm { struct debugfs_blob_wrapper nvm_calib_blob; struct debugfs_blob_wrapper nvm_prod_blob; struct debugfs_blob_wrapper nvm_phy_sku_blob; + struct debugfs_blob_wrapper nvm_reg_blob; struct iwl_mvm_frame_stats drv_rx_stats; spinlock_t drv_stats_lock; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index f08934b03cf3..7bdbd010ae6b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -416,6 +416,11 @@ int iwl_nvm_init(struct iwl_mvm *mvm) mvm->nvm_phy_sku_blob.data = temp; mvm->nvm_phy_sku_blob.size = ret; break; + case NVM_SECTION_TYPE_REGULATORY_SDP: + case NVM_SECTION_TYPE_REGULATORY: + mvm->nvm_reg_blob.data = temp; + mvm->nvm_reg_blob.size = ret; + break; default: if (section == mvm->cfg->nvm_hw_section_num) { mvm->nvm_hw_blob.data = temp; From da0df827cd15813c064d5859926b5f1e779d0061 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 31 Oct 2018 10:21:10 +0200 Subject: [PATCH 0276/1436] iwlwifi: make iwl_fw_dbg_start_stop_hcmd() inline This function is supposed to be used as an inline function and is in a header file, so make it inline. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 7cd206e6fb9e..d8090d4b5c43 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -269,7 +269,8 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt, iwl_fw_dbg_get_trigger((fwrt)->fw,\ (trig))) -static int iwl_fw_dbg_start_stop_hcmd(struct iwl_fw_runtime *fwrt, bool start) +static inline int +iwl_fw_dbg_start_stop_hcmd(struct iwl_fw_runtime *fwrt, bool start) { struct iwl_ldbg_config_cmd cmd = { .type = start ? cpu_to_le32(START_DEBUG_RECORDING) : From e78da25e19c2dd31018c358d0717e4f6cfe317c8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Oct 2018 08:48:49 +0100 Subject: [PATCH 0277/1436] iwlwifi: move iwl_enable_{rx,tx}_ampdu to iwl-modparams.h These inlines just check the module parameters, so they don't need a configuration parameter and can move to a better place. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/dvm/mac80211.c | 23 +++---------------- .../wireless/intel/iwlwifi/iwl-modparams.h | 18 +++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 22 ++---------------- 3 files changed, 23 insertions(+), 40 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 49b71dbf8490..54b759cec8b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -1,6 +1,7 @@ /****************************************************************************** * * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved. + * Copyright (C) 2018 Intel Corporation * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -710,24 +711,6 @@ static int iwlagn_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, return ret; } -static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) -{ - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) - return false; - return true; -} - -static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) -{ - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) - return false; - if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) - return true; - - /* disabled by default */ - return false; -} - static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params) @@ -752,7 +735,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: - if (!iwl_enable_rx_ampdu(priv->cfg)) + if (!iwl_enable_rx_ampdu()) break; IWL_DEBUG_HT(priv, "start Rx\n"); ret = iwl_sta_rx_agg_start(priv, sta, tid, *ssn); @@ -764,7 +747,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_TX_START: if (!priv->trans->ops->txq_enable) break; - if (!iwl_enable_tx_ampdu(priv->cfg)) + if (!iwl_enable_tx_ampdu()) break; IWL_DEBUG_HT(priv, "start Tx\n"); ret = iwlagn_tx_agg_start(priv, vif, sta, tid, ssn); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h index 73b1c46f1158..0cae2ef9b9df 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h @@ -152,4 +152,22 @@ struct iwl_mod_params { bool enable_ini; }; +static inline bool iwl_enable_rx_ampdu(void) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + return false; + return true; +} + +static inline bool iwl_enable_tx_ampdu(void) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + return false; + if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) + return true; + + /* enabled by default */ + return true; +} + #endif /* #__iwl_modparams_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index ebb3814dd922..78b35c9595a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -943,24 +943,6 @@ static void iwl_mvm_mac_wake_tx_queue(struct ieee80211_hw *hw, schedule_work(&mvm->add_stream_wk); } -static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) -{ - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) - return false; - return true; -} - -static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) -{ - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) - return false; - if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) - return true; - - /* enabled by default */ - return true; -} - #define CHECK_BA_TRIGGER(_mvm, _trig, _tid_bm, _tid, _fmt...) \ do { \ if (!(le16_to_cpu(_tid_bm) & BIT(_tid))) \ @@ -1073,7 +1055,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, mvmvif = iwl_mvm_vif_from_mac80211(vif); cancel_delayed_work(&mvmvif->uapsd_nonagg_detected_wk); } - if (!iwl_enable_rx_ampdu(mvm->cfg)) { + if (!iwl_enable_rx_ampdu()) { ret = -EINVAL; break; } @@ -1085,7 +1067,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, timeout); break; case IEEE80211_AMPDU_TX_START: - if (!iwl_enable_tx_ampdu(mvm->cfg)) { + if (!iwl_enable_tx_ampdu()) { ret = -EINVAL; break; } From 94a8d87c47642bc0b99f1c9204b111b5826f1838 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 5 Nov 2018 11:07:58 +0200 Subject: [PATCH 0278/1436] iwlwifi: mvm: pre-initialize alive_data in wait_alive() The function we pass to the wait alive notification procedure may may not even get called if the timeout occurs before the function is called. To prevent accessing unitialized data in alive_data, pre-set it to zero in the declaration. Found by static analyzers. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 27582d70d45a..d3dc9d276e0f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -293,7 +293,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type) { struct iwl_notification_wait alive_wait; - struct iwl_mvm_alive_data alive_data; + struct iwl_mvm_alive_data alive_data = {}; const struct fw_img *fw; int ret; enum iwl_ucode_type old_type = mvm->fwrt.cur_fw_img; From 29cf396f08f6476935c445faddcaf90f2fe2fa87 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 5 Nov 2018 15:45:54 +0200 Subject: [PATCH 0279/1436] iwlwifi: calculate pointers from out_cmd instead of out_cmd->hdr The out_cmd structure starts with a header, so there's no need to use &out_cmd->hdr, out_cmd alone is enough. We use this when calculating other addresses and klocwork gets confused with that because it thinks we are trying to access hdr (as an array) beyond its size. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 8807ea9d693c..f3d2e8fe920b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -838,14 +838,14 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, /* start the TFD with the minimum copy bytes */ tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); - memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size); + memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size); iwl_pcie_gen2_set_tb(trans, tfd, iwl_pcie_get_first_tb_dma(txq, idx), tb0_size); /* map first command fragment, if any remains */ if (copy_size > tb0_size) { phys_addr = dma_map_single(trans->dev, - ((u8 *)&out_cmd->hdr) + tb0_size, + (u8 *)out_cmd + tb0_size, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { From 9adcc117bc7d16e5ef226fec0a1d1f25765bf9ce Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 6 Nov 2018 08:24:12 +0200 Subject: [PATCH 0280/1436] iwlwifi: make sure cur_fw_img is valid before accessing img Harden the fwrt->fw->img array access by making sure the cur_fw_img value doesn't go out of bounds. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index d8090d4b5c43..330229d2a61d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -381,6 +381,7 @@ static inline bool iwl_fw_dbg_is_paging_enabled(struct iwl_fw_runtime *fwrt) { return iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_PAGING) && !fwrt->trans->cfg->gen2 && + fwrt->cur_fw_img < IWL_UCODE_TYPE_MAX && fwrt->fw->img[fwrt->cur_fw_img].paging_mem_size && fwrt->fw_paging_db[0].fw_paging_block; } From 34a880d8bd4c29f7aaf02a46e2569234bd178528 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Thu, 1 Nov 2018 16:30:15 +0200 Subject: [PATCH 0281/1436] iwlwifi: mvm: remove sta key on wep ap If WEP is used, no one removes the STA key upon STA removal, leading to a situation where after 16 connect/disconnects - the AP could no longer decrypt incoming data frames since iwl_mvm_set_fw_key_idx() called from iwl_mvm_set_sta_key() during association returns STA_KEY_IDX_INVALID, thus not setting the key for that connecting STA. Fix this by removing the key in the driver when the STA is removed. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 78b35c9595a3..b4a55349336f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3091,6 +3091,16 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, iwl_mvm_tdls_check_trigger(mvm, vif, sta->addr, NL80211_TDLS_DISABLE_LINK); } + + /* Remove STA key if this is an AP using WEP */ + if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) { + int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta, + mvmvif->ap_wep_key); + + if (!ret) + ret = rm_ret; + } + } else { ret = -EIO; } From fefbf85305ecd28c383c2d83694b147217605059 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Wed, 31 Oct 2018 13:29:21 +0200 Subject: [PATCH 0282/1436] iwlwifi: monitor dumping flow cleanup Since we use a dumping mask, checking if only monitor was requested is redundant. Remove the unneeded code. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/pcie/trans.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index e72bd9796b5b..472a85d9a358 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3169,8 +3169,7 @@ static struct iwl_trans_dump_data struct iwl_txq *cmdq = trans_pcie->txq[trans_pcie->cmd_queue]; struct iwl_fw_error_dump_txcmd *txcmd; struct iwl_trans_dump_data *dump_data; - u32 len, num_rbs = 0; - u32 monitor_len; + u32 len, num_rbs = 0, monitor_len = 0; int i, ptr; bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status) && !trans->cfg->mq_rx_supported && @@ -3187,19 +3186,8 @@ static struct iwl_trans_dump_data cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); /* FW monitor */ - monitor_len = iwl_trans_get_fw_monitor_len(trans, &len); - - if (dump_mask == BIT(IWL_FW_ERROR_DUMP_FW_MONITOR)) { - dump_data = vzalloc(len); - if (!dump_data) - return NULL; - - data = (void *)dump_data->data; - len = iwl_trans_pcie_dump_monitor(trans, &data, monitor_len); - dump_data->len = len; - - return dump_data; - } + if (dump_mask & BIT(IWL_FW_ERROR_DUMP_FW_MONITOR)) + monitor_len = iwl_trans_get_fw_monitor_len(trans, &len); /* CSR registers */ if (dump_mask & BIT(IWL_FW_ERROR_DUMP_CSR)) From 698478c49294eb3d4d64f7a3b61d460495826078 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 29 Oct 2018 11:23:45 +0200 Subject: [PATCH 0283/1436] iwlwifi: mvm: add an option to dereference vif by id Currently whenever we get firmware notification with mac id, we iterate over all the interfaces to find the ID. This is a bit cumbersome. Instead, adding an array of RCU pointers, like we have for station IDs. This is not expensive space wise since we have only up to 4 active MACs, and not complicated code wise, since we have a clear point to init and de-init it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 99 +++++++++---------- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 + drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 14 +++ .../net/wireless/intel/iwlwifi/mvm/utils.c | 22 ++--- 4 files changed, 72 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index b1d6dea7672e..c868ebfa10ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1382,35 +1382,47 @@ void iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, } } -static void iwl_mvm_beacon_loss_iterator(void *_data, u8 *mac, - struct ieee80211_vif *vif) +void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, + struct iwl_rx_cmd_buffer *rxb) { - struct iwl_missed_beacons_notif *missed_beacons = _data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; + struct iwl_rx_packet *pkt = rxb_addr(rxb); + struct iwl_missed_beacons_notif *mb = (void *)pkt->data; struct iwl_fw_dbg_trigger_missed_bcon *bcon_trig; struct iwl_fw_dbg_trigger_tlv *trigger; u32 stop_trig_missed_bcon, stop_trig_missed_bcon_since_rx; u32 rx_missed_bcon, rx_missed_bcon_since_rx; + struct ieee80211_vif *vif; + u32 id = le32_to_cpu(mb->mac_id); - if (mvmvif->id != (u16)le32_to_cpu(missed_beacons->mac_id)) - return; + IWL_DEBUG_INFO(mvm, + "missed bcn mac_id=%u, consecutive=%u (%u, %u, %u)\n", + le32_to_cpu(mb->mac_id), + le32_to_cpu(mb->consec_missed_beacons), + le32_to_cpu(mb->consec_missed_beacons_since_last_rx), + le32_to_cpu(mb->num_recvd_beacons), + le32_to_cpu(mb->num_expected_beacons)); - rx_missed_bcon = le32_to_cpu(missed_beacons->consec_missed_beacons); + rcu_read_lock(); + + vif = iwl_mvm_rcu_dereference_vif_id(mvm, id, true); + if (!vif) + goto out; + + rx_missed_bcon = le32_to_cpu(mb->consec_missed_beacons); rx_missed_bcon_since_rx = - le32_to_cpu(missed_beacons->consec_missed_beacons_since_last_rx); + le32_to_cpu(mb->consec_missed_beacons_since_last_rx); /* * TODO: the threshold should be adjusted based on latency conditions, * and/or in case of a CS flow on one of the other AP vifs. */ - if (le32_to_cpu(missed_beacons->consec_missed_beacons_since_last_rx) > + if (le32_to_cpu(mb->consec_missed_beacons_since_last_rx) > IWL_MVM_MISSED_BEACONS_THRESHOLD) ieee80211_beacon_loss(vif); trigger = iwl_fw_dbg_trigger_on(&mvm->fwrt, ieee80211_vif_to_wdev(vif), FW_DBG_TRIGGER_MISSED_BEACONS); if (!trigger) - return; + goto out; bcon_trig = (void *)trigger->data; stop_trig_missed_bcon = le32_to_cpu(bcon_trig->stop_consec_missed_bcon); @@ -1422,28 +1434,11 @@ static void iwl_mvm_beacon_loss_iterator(void *_data, u8 *mac, if (rx_missed_bcon_since_rx >= stop_trig_missed_bcon_since_rx || rx_missed_bcon >= stop_trig_missed_bcon) iwl_fw_dbg_collect_trig(&mvm->fwrt, trigger, NULL); -} - -void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, - struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_missed_beacons_notif *mb = (void *)pkt->data; - - IWL_DEBUG_INFO(mvm, - "missed bcn mac_id=%u, consecutive=%u (%u, %u, %u)\n", - le32_to_cpu(mb->mac_id), - le32_to_cpu(mb->consec_missed_beacons), - le32_to_cpu(mb->consec_missed_beacons_since_last_rx), - le32_to_cpu(mb->num_recvd_beacons), - le32_to_cpu(mb->num_expected_beacons)); - - ieee80211_iterate_active_interfaces_atomic(mvm->hw, - IEEE80211_IFACE_ITER_NORMAL, - iwl_mvm_beacon_loss_iterator, - mb); iwl_fw_dbg_apply_point(&mvm->fwrt, IWL_FW_INI_APPLY_MISSED_BEACONS); + +out: + rcu_read_unlock(); } void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm, @@ -1485,16 +1480,29 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm, ieee80211_rx_napi(mvm->hw, NULL, skb, NULL); } -static void iwl_mvm_probe_resp_data_iter(void *_data, u8 *mac, - struct ieee80211_vif *vif) +void iwl_mvm_probe_resp_data_notif(struct iwl_mvm *mvm, + struct iwl_rx_cmd_buffer *rxb) { - struct iwl_probe_resp_data_notif *notif = _data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_rx_packet *pkt = rxb_addr(rxb); + struct iwl_probe_resp_data_notif *notif = (void *)pkt->data; struct iwl_probe_resp_data *old_data, *new_data; + int len = iwl_rx_packet_payload_len(pkt); + u32 id = le32_to_cpu(notif->mac_id); + struct ieee80211_vif *vif; + struct iwl_mvm_vif *mvmvif; - if (mvmvif->id != (u16)le32_to_cpu(notif->mac_id)) + if (WARN_ON_ONCE(len < sizeof(*notif))) return; + IWL_DEBUG_INFO(mvm, "Probe response data notif: noa %d, csa %d\n", + notif->noa_active, notif->csa_counter); + + vif = iwl_mvm_rcu_dereference_vif_id(mvm, id, false); + if (!vif) + return; + + mvmvif = iwl_mvm_vif_from_mac80211(vif); + new_data = kzalloc(sizeof(*new_data), GFP_KERNEL); if (!new_data) return; @@ -1525,25 +1533,6 @@ static void iwl_mvm_probe_resp_data_iter(void *_data, u8 *mac, ieee80211_csa_set_counter(vif, notif->csa_counter); } -void iwl_mvm_probe_resp_data_notif(struct iwl_mvm *mvm, - struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_probe_resp_data_notif *notif = (void *)pkt->data; - int len = iwl_rx_packet_payload_len(pkt); - - if (WARN_ON_ONCE(len < sizeof(*notif))) - return; - - IWL_DEBUG_INFO(mvm, "Probe response data notif: noa %d, csa %d\n", - notif->noa_active, notif->csa_counter); - - ieee80211_iterate_active_interfaces(mvm->hw, - IEEE80211_IFACE_ITER_ACTIVE, - iwl_mvm_probe_resp_data_iter, - notif); -} - void iwl_mvm_channel_switch_noa_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index b4a55349336f..e3da6992f244 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1471,6 +1471,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, if (ret) goto out_unlock; + rcu_assign_pointer(mvm->vif_id_to_mac[mvmvif->id], vif); + /* Counting number of interfaces is needed for legacy PM */ if (vif->type != NL80211_IFTYPE_P2P_DEVICE) mvm->vif_count++; @@ -1662,6 +1664,8 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, iwl_mvm_power_update_mac(mvm); iwl_mvm_mac_ctxt_remove(mvm, vif); + RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL); + if (vif->type == NL80211_IFTYPE_MONITOR) mvm->monitor_on = false; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index c314f77f657f..d326843636cb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -989,6 +989,7 @@ struct iwl_mvm { u8 refs[IWL_MVM_REF_COUNT]; u8 vif_count; + struct ieee80211_vif __rcu *vif_id_to_mac[NUM_MAC_INDEX_DRIVER]; /* -1 for always, 0 for never, >0 for that many times */ s8 fw_restart; @@ -1241,6 +1242,19 @@ iwl_mvm_sta_from_staid_protected(struct iwl_mvm *mvm, u8 sta_id) return iwl_mvm_sta_from_mac80211(sta); } +static inline struct ieee80211_vif * +iwl_mvm_rcu_dereference_vif_id(struct iwl_mvm *mvm, u8 vif_id, bool rcu) +{ + if (WARN_ON(vif_id >= ARRAY_SIZE(mvm->vif_id_to_mac))) + return NULL; + + if (rcu) + return rcu_dereference(mvm->vif_id_to_mac[vif_id]); + + return rcu_dereference_protected(mvm->vif_id_to_mac[vif_id], + lockdep_is_held(&mvm->mutex)); +} + static inline bool iwl_mvm_is_d0i3_supported(struct iwl_mvm *mvm) { return !iwlwifi_mod_params.d0i3_disable && diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 5dbbd35ee630..211c4638d690 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1136,19 +1136,14 @@ static void iwl_mvm_tcm_uapsd_nonagg_detected_wk(struct work_struct *wk) "AP isn't using AMPDU with uAPSD enabled"); } -static void iwl_mvm_uapsd_agg_disconnect_iter(void *data, u8 *mac, - struct ieee80211_vif *vif) +static void iwl_mvm_uapsd_agg_disconnect(struct iwl_mvm *mvm, + struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - int *mac_id = data; if (vif->type != NL80211_IFTYPE_STATION) return; - if (mvmvif->id != *mac_id) - return; - if (!vif->bss_conf.assoc) return; @@ -1158,10 +1153,10 @@ static void iwl_mvm_uapsd_agg_disconnect_iter(void *data, u8 *mac, !mvmvif->queue_params[IEEE80211_AC_BK].uapsd) return; - if (mvm->tcm.data[*mac_id].uapsd_nonagg_detect.detected) + if (mvm->tcm.data[mvmvif->id].uapsd_nonagg_detect.detected) return; - mvm->tcm.data[*mac_id].uapsd_nonagg_detect.detected = true; + mvm->tcm.data[mvmvif->id].uapsd_nonagg_detect.detected = true; IWL_INFO(mvm, "detected AP should do aggregation but isn't, likely due to U-APSD\n"); schedule_delayed_work(&mvmvif->uapsd_nonagg_detected_wk, 15 * HZ); @@ -1174,6 +1169,7 @@ static void iwl_mvm_check_uapsd_agg_expected_tpt(struct iwl_mvm *mvm, u64 bytes = mvm->tcm.data[mac].uapsd_nonagg_detect.rx_bytes; u64 tpt; unsigned long rate; + struct ieee80211_vif *vif; rate = ewma_rate_read(&mvm->tcm.data[mac].uapsd_nonagg_detect.rate); @@ -1202,9 +1198,11 @@ static void iwl_mvm_check_uapsd_agg_expected_tpt(struct iwl_mvm *mvm, return; } - ieee80211_iterate_active_interfaces_atomic( - mvm->hw, IEEE80211_IFACE_ITER_NORMAL, - iwl_mvm_uapsd_agg_disconnect_iter, &mac); + rcu_read_lock(); + vif = rcu_dereference(mvm->vif_id_to_mac[mac]); + if (vif) + iwl_mvm_uapsd_agg_disconnect(mvm, vif); + rcu_read_unlock(); } static void iwl_mvm_tcm_iterator(void *_data, u8 *mac, From 1f7698abedeeb3fef3cbcf78e16f925df675a179 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2018 09:51:56 +0100 Subject: [PATCH 0284/1436] iwlwifi: mvm: fix A-MPDU reference assignment The current code assigns the reference, and then goes to increment it if the toggle bit has changed. That way, we get Toggle 0 0 0 0 1 1 1 1 ID 1 1 1 1 1 2 2 2 Fix that by assigning the post-toggle ID to get Toggle 0 0 0 0 1 1 1 1 ID 1 1 1 1 2 2 2 2 Reported-by: Danny Alexander Signed-off-by: Johannes Berg Fixes: fbe4112791b8 ("iwlwifi: mvm: update mpdu metadata API") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 9342f61b702e..120e3f2d21aa 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1438,12 +1438,12 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->ampdu_reference = mvm->ampdu_ref; /* toggle is switched whenever new aggregation starts */ if (toggle_bit != mvm->ampdu_toggle) { mvm->ampdu_ref++; mvm->ampdu_toggle = toggle_bit; } + rx_status->ampdu_reference = mvm->ampdu_ref; } rcu_read_lock(); From fba8248e7e67b7e1098e69284aeccbcb2110fa86 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 25 Oct 2018 20:11:51 +0300 Subject: [PATCH 0285/1436] iwlwifi: mvm: get rid of tx_path_lock TX path lock was introduced in order to prevent out of order invocations of TX. This can happen in the following flow: TX path invoked from net dev Packet dequeued TX path invoked from RX path Packet dequeued Packet TXed Packet TXed However, we don't really need a lock. If TX path is already invoked from some location, other paths can simply abort their execution, instead of waiting to the first path to finish, and then discover queue is (likely) empty or stopped. Replace the lock with an atomic variable to track TX ownership. This simplifies the locking dependencies between RX and TX paths, and should improve performance. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 48 +++++++++++++------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 +- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index e3da6992f244..dd0761e5cf6a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -878,25 +878,45 @@ void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); struct sk_buff *skb = NULL; - spin_lock(&mvmtxq->tx_path_lock); + /* + * No need for threads to be pending here, they can leave the first + * taker all the work. + * + * mvmtxq->tx_request logic: + * + * If 0, no one is currently TXing, set to 1 to indicate current thread + * will now start TX and other threads should quit. + * + * If 1, another thread is currently TXing, set to 2 to indicate to + * that thread that there was another request. Since that request may + * have raced with the check whether the queue is empty, the TXing + * thread should check the queue's status one more time before leaving. + * This check is done in order to not leave any TX hanging in the queue + * until the next TX invocation (which may not even happen). + * + * If 2, another thread is currently TXing, and it will already double + * check the queue, so do nothing. + */ + if (atomic_fetch_add_unless(&mvmtxq->tx_request, 1, 2)) + return; rcu_read_lock(); - while (likely(!mvmtxq->stopped && - (mvm->trans->system_pm_mode == - IWL_PLAT_PM_MODE_DISABLED))) { - skb = ieee80211_tx_dequeue(hw, txq); + do { + while (likely(!mvmtxq->stopped && + (mvm->trans->system_pm_mode == + IWL_PLAT_PM_MODE_DISABLED))) { + skb = ieee80211_tx_dequeue(hw, txq); - if (!skb) - break; + if (!skb) + break; - if (!txq->sta) - iwl_mvm_tx_skb_non_sta(mvm, skb); - else - iwl_mvm_tx_skb(mvm, skb, txq->sta); - } + if (!txq->sta) + iwl_mvm_tx_skb_non_sta(mvm, skb); + else + iwl_mvm_tx_skb(mvm, skb, txq->sta); + } + } while (atomic_dec_return(&mvmtxq->tx_request)); rcu_read_unlock(); - - spin_unlock(&mvmtxq->tx_path_lock); } static void iwl_mvm_mac_wake_tx_queue(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d326843636cb..a289b9488c03 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -781,8 +781,7 @@ struct iwl_mvm_geo_profile { struct iwl_mvm_txq { struct list_head list; u16 txq_id; - /* Protects TX path invocation from two places */ - spinlock_t tx_path_lock; + atomic_t tx_request; bool stopped; }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 5f42897dcd55..c5a01470a3bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1403,9 +1403,7 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid); list_del_init(&mvmtxq->list); - local_bh_disable(); iwl_mvm_mac_itxq_xmit(mvm->hw, txq); - local_bh_enable(); } mutex_unlock(&mvm->mutex); @@ -1646,7 +1644,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; INIT_LIST_HEAD(&mvmtxq->list); - spin_lock_init(&mvmtxq->tx_path_lock); + atomic_set(&mvmtxq->tx_request, 0); } mvm_sta->agg_tids = 0; From 3a894a9f319f0b4a36857683c4caacc371a40d25 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Oct 2018 14:47:52 +0100 Subject: [PATCH 0286/1436] iwlwifi: remove TOF implementation This is an ancient (~2015) implementation that no longer matches the firmware in any way, and most likely never worked. Remove all of it so it can be reintroduced properly. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/commands.h | 12 - .../net/wireless/intel/iwlwifi/fw/api/tof.h | 393 --------- drivers/net/wireless/intel/iwlwifi/fw/file.h | 2 - .../net/wireless/intel/iwlwifi/mvm/Makefile | 1 - .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 770 ------------------ .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 3 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 - drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 8 - drivers/net/wireless/intel/iwlwifi/mvm/tof.c | 305 ------- drivers/net/wireless/intel/iwlwifi/mvm/tof.h | 89 -- 10 files changed, 2 insertions(+), 1584 deletions(-) delete mode 100644 drivers/net/wireless/intel/iwlwifi/fw/api/tof.h delete mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/tof.c delete mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/tof.h diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index 89c0cfddcc3f..baba7bcee5fc 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -77,7 +77,6 @@ * @DATA_PATH_GROUP: data path group, uses command IDs from * &enum iwl_data_path_subcmd_ids * @NAN_GROUP: NAN group, uses command IDs from &enum iwl_nan_subcmd_ids - * @TOF_GROUP: TOF group, uses command IDs from &enum iwl_tof_subcmd_ids * @PROT_OFFLOAD_GROUP: protocol offload group, uses command IDs from * &enum iwl_prot_offload_subcmd_ids * @REGULATORY_AND_NVM_GROUP: regulatory/NVM group, uses command IDs from @@ -92,7 +91,6 @@ enum iwl_mvm_command_groups { PHY_OPS_GROUP = 0x4, DATA_PATH_GROUP = 0x5, NAN_GROUP = 0x7, - TOF_GROUP = 0x8, PROT_OFFLOAD_GROUP = 0xb, REGULATORY_AND_NVM_GROUP = 0xc, DEBUG_GROUP = 0xf, @@ -352,16 +350,6 @@ enum iwl_legacy_cmds { */ PHY_DB_CMD = 0x6c, - /** - * @TOF_CMD: &struct iwl_tof_config_cmd - */ - TOF_CMD = 0x10, - - /** - * @TOF_NOTIFICATION: &struct iwl_tof_gen_resp_cmd - */ - TOF_NOTIFICATION = 0x11, - /** * @POWER_TABLE_CMD: &struct iwl_device_power_cmd */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tof.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tof.h deleted file mode 100644 index 7328a1606146..000000000000 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tof.h +++ /dev/null @@ -1,393 +0,0 @@ -/****************************************************************************** - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution - * in the file called COPYING. - * - * Contact Information: - * Intel Linux Wireless - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - * BSD LICENSE - * - * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - *****************************************************************************/ -#ifndef __iwl_fw_api_tof_h__ -#define __iwl_fw_api_tof_h__ - -/* ToF sub-group command IDs */ -enum iwl_mvm_tof_sub_grp_ids { - TOF_RANGE_REQ_CMD = 0x1, - TOF_CONFIG_CMD = 0x2, - TOF_RANGE_ABORT_CMD = 0x3, - TOF_RANGE_REQ_EXT_CMD = 0x4, - TOF_RESPONDER_CONFIG_CMD = 0x5, - TOF_NW_INITIATED_RES_SEND_CMD = 0x6, - TOF_NEIGHBOR_REPORT_REQ_CMD = 0x7, - TOF_NEIGHBOR_REPORT_RSP_NOTIF = 0xFC, - TOF_NW_INITIATED_REQ_RCVD_NOTIF = 0xFD, - TOF_RANGE_RESPONSE_NOTIF = 0xFE, - TOF_MCSI_DEBUG_NOTIF = 0xFB, -}; - -/** - * struct iwl_tof_config_cmd - ToF configuration - * @tof_disabled: 0 enabled, 1 - disabled - * @one_sided_disabled: 0 enabled, 1 - disabled - * @is_debug_mode: 1 debug mode, 0 - otherwise - * @is_buf_required: 1 channel estimation buffer required, 0 - otherwise - */ -struct iwl_tof_config_cmd { - __le32 sub_grp_cmd_id; - u8 tof_disabled; - u8 one_sided_disabled; - u8 is_debug_mode; - u8 is_buf_required; -} __packed; - -/** - * struct iwl_tof_responder_config_cmd - ToF AP mode (for debug) - * @burst_period: future use: (currently hard coded in the LMAC) - * The interval between two sequential bursts. - * @min_delta_ftm: future use: (currently hard coded in the LMAC) - * The minimum delay between two sequential FTM Responses - * in the same burst. - * @burst_duration: future use: (currently hard coded in the LMAC) - * The total time for all FTMs handshake in the same burst. - * Affect the time events duration in the LMAC. - * @num_of_burst_exp: future use: (currently hard coded in the LMAC) - * The number of bursts for the current ToF request. Affect - * the number of events allocations in the current iteration. - * @get_ch_est: for xVT only, NA for driver - * @abort_responder: when set to '1' - Responder will terminate its activity - * (all other fields in the command are ignored) - * @recv_sta_req_params: 1 - Responder will ignore the other Responder's - * params and use the recomended Initiator params. - * 0 - otherwise - * @channel_num: current AP Channel - * @bandwidth: current AP Bandwidth: 0 20MHz, 1 40MHz, 2 80MHz - * @rate: current AP rate - * @ctrl_ch_position: coding of the control channel position relative to - * the center frequency: - * - * 40 MHz - * 0 below center, 1 above center - * - * 80 MHz - * bits [0..1] - * * 0 the near 20MHz to the center, - * * 1 the far 20MHz to the center - * bit[2] - * as above 40MHz - * @ftm_per_burst: FTMs per Burst - * @ftm_resp_ts_avail: '0' - we don't measure over the Initial FTM Response, - * '1' - we measure over the Initial FTM Response - * @asap_mode: ASAP / Non ASAP mode for the current WLS station - * @sta_id: index of the AP STA when in AP mode - * @tsf_timer_offset_msecs: The dictated time offset (mSec) from the AP's TSF - * @toa_offset: Artificial addition [0.1nsec] for the ToA - to be used for debug - * purposes, simulating station movement by adding various values - * to this field - * @bssid: Current AP BSSID - */ -struct iwl_tof_responder_config_cmd { - __le32 sub_grp_cmd_id; - __le16 burst_period; - u8 min_delta_ftm; - u8 burst_duration; - u8 num_of_burst_exp; - u8 get_ch_est; - u8 abort_responder; - u8 recv_sta_req_params; - u8 channel_num; - u8 bandwidth; - u8 rate; - u8 ctrl_ch_position; - u8 ftm_per_burst; - u8 ftm_resp_ts_avail; - u8 asap_mode; - u8 sta_id; - __le16 tsf_timer_offset_msecs; - __le16 toa_offset; - u8 bssid[ETH_ALEN]; -} __packed; - -/** - * struct iwl_tof_range_request_ext_cmd - extended range req for WLS - * @tsf_timer_offset_msec: the recommended time offset (mSec) from the AP's TSF - * @reserved: reserved - * @min_delta_ftm: Minimal time between two consecutive measurements, - * in units of 100us. 0 means no preference by station - * @ftm_format_and_bw20M: FTM Channel Spacing/Format for 20MHz: recommended - * value be sent to the AP - * @ftm_format_and_bw40M: FTM Channel Spacing/Format for 40MHz: recommended - * value to be sent to the AP - * @ftm_format_and_bw80M: FTM Channel Spacing/Format for 80MHz: recommended - * value to be sent to the AP - */ -struct iwl_tof_range_req_ext_cmd { - __le32 sub_grp_cmd_id; - __le16 tsf_timer_offset_msec; - __le16 reserved; - u8 min_delta_ftm; - u8 ftm_format_and_bw20M; - u8 ftm_format_and_bw40M; - u8 ftm_format_and_bw80M; -} __packed; - -#define IWL_MVM_TOF_MAX_APS 21 - -/** - * struct iwl_tof_range_req_ap_entry - AP configuration parameters - * @channel_num: Current AP Channel - * @bandwidth: Current AP Bandwidth: 0 20MHz, 1 40MHz, 2 80MHz - * @tsf_delta_direction: TSF relatively to the subject AP - * @ctrl_ch_position: Coding of the control channel position relative to the - * center frequency. - * 40MHz 0 below center, 1 above center - * 80MHz bits [0..1]: 0 the near 20MHz to the center, - * 1 the far 20MHz to the center - * bit[2] as above 40MHz - * @bssid: AP's bss id - * @measure_type: Measurement type: 0 - two sided, 1 - One sided - * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of the - * number of measurement iterations (min 2^0 = 1, max 2^14) - * @burst_period: Recommended value to be sent to the AP. Measurement - * periodicity In units of 100ms. ignored if num_of_bursts = 0 - * @samples_per_burst: 2-sided: the number of FTMs pairs in single Burst (1-31) - * 1-sided: how many rts/cts pairs should be used per burst. - * @retries_per_sample: Max number of retries that the LMAC should send - * in case of no replies by the AP. - * @tsf_delta: TSF Delta in units of microseconds. - * The difference between the AP TSF and the device local clock. - * @location_req: Location Request Bit[0] LCI should be sent in the FTMR - * Bit[1] Civic should be sent in the FTMR - * @asap_mode: 0 - non asap mode, 1 - asap mode (not relevant for one sided) - * @enable_dyn_ack: Enable Dynamic ACK BW. - * 0 Initiator interact with regular AP - * 1 Initiator interact with Responder machine: need to send the - * Initiator Acks with HT 40MHz / 80MHz, since the Responder should - * use it for its ch est measurement (this flag will be set when we - * configure the opposite machine to be Responder). - * @rssi: Last received value - * leagal values: -128-0 (0x7f). above 0x0 indicating an invalid value. - */ -struct iwl_tof_range_req_ap_entry { - u8 channel_num; - u8 bandwidth; - u8 tsf_delta_direction; - u8 ctrl_ch_position; - u8 bssid[ETH_ALEN]; - u8 measure_type; - u8 num_of_bursts; - __le16 burst_period; - u8 samples_per_burst; - u8 retries_per_sample; - __le32 tsf_delta; - u8 location_req; - u8 asap_mode; - u8 enable_dyn_ack; - s8 rssi; -} __packed; - -/** - * enum iwl_tof_response_mode - * @IWL_MVM_TOF_RESPOSE_ASAP: report each AP measurement separately as soon as - * possible (not supported for this release) - * @IWL_MVM_TOF_RESPOSE_TIMEOUT: report all AP measurements as a batch upon - * timeout expiration - * @IWL_MVM_TOF_RESPOSE_COMPLETE: report all AP measurements as a batch at the - * earlier of: measurements completion / timeout - * expiration. - */ -enum iwl_tof_response_mode { - IWL_MVM_TOF_RESPOSE_ASAP = 1, - IWL_MVM_TOF_RESPOSE_TIMEOUT, - IWL_MVM_TOF_RESPOSE_COMPLETE, -}; - -/** - * struct iwl_tof_range_req_cmd - start measurement cmd - * @request_id: A Token incremented per request. The same Token will be - * sent back in the range response - * @initiator: 0- NW initiated, 1 - Client Initiated - * @one_sided_los_disable: '0'- run ML-Algo for both ToF/OneSided, - * '1' - run ML-Algo for ToF only - * @req_timeout: Requested timeout of the response in units of 100ms. - * This is equivalent to the session time configured to the - * LMAC in Initiator Request - * @report_policy: Supported partially for this release: For current release - - * the range report will be uploaded as a batch when ready or - * when the session is done (successfully / partially). - * one of iwl_tof_response_mode. - * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS) - * @macaddr_random: '0' Use default source MAC address (i.e. p2_p), - * '1' Use MAC Address randomization according to the below - * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template. - * Bits set to 1 shall be randomized by the UMAC - * @ap: per-AP request data - */ -struct iwl_tof_range_req_cmd { - __le32 sub_grp_cmd_id; - u8 request_id; - u8 initiator; - u8 one_sided_los_disable; - u8 req_timeout; - u8 report_policy; - u8 los_det_disable; - u8 num_of_ap; - u8 macaddr_random; - u8 macaddr_template[ETH_ALEN]; - u8 macaddr_mask[ETH_ALEN]; - struct iwl_tof_range_req_ap_entry ap[IWL_MVM_TOF_MAX_APS]; -} __packed; - -/** - * struct iwl_tof_gen_resp_cmd - generic ToF response - */ -struct iwl_tof_gen_resp_cmd { - __le32 sub_grp_cmd_id; - u8 data[]; -} __packed; - -/** - * struct iwl_tof_range_rsp_ap_entry_ntfy - AP parameters (response) - * @bssid: BSSID of the AP - * @measure_status: current APs measurement status, one of - * &enum iwl_tof_entry_status. - * @measure_bw: Current AP Bandwidth: 0 20MHz, 1 40MHz, 2 80MHz - * @rtt: The Round Trip Time that took for the last measurement for - * current AP [nSec] - * @rtt_variance: The Variance of the RTT values measured for current AP - * @rtt_spread: The Difference between the maximum and the minimum RTT - * values measured for current AP in the current session [nsec] - * @rssi: RSSI as uploaded in the Channel Estimation notification - * @rssi_spread: The Difference between the maximum and the minimum RSSI values - * measured for current AP in the current session - * @reserved: reserved - * @range: Measured range [cm] - * @range_variance: Measured range variance [cm] - * @timestamp: The GP2 Clock [usec] where Channel Estimation notification was - * uploaded by the LMAC - */ -struct iwl_tof_range_rsp_ap_entry_ntfy { - u8 bssid[ETH_ALEN]; - u8 measure_status; - u8 measure_bw; - __le32 rtt; - __le32 rtt_variance; - __le32 rtt_spread; - s8 rssi; - u8 rssi_spread; - __le16 reserved; - __le32 range; - __le32 range_variance; - __le32 timestamp; -} __packed; - -/** - * struct iwl_tof_range_rsp_ntfy - - * @request_id: A Token ID of the corresponding Range request - * @request_status: status of current measurement session - * @last_in_batch: reprot policy (when not all responses are uploaded at once) - * @num_of_aps: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS) - * @ap: per-AP data - */ -struct iwl_tof_range_rsp_ntfy { - u8 request_id; - u8 request_status; - u8 last_in_batch; - u8 num_of_aps; - struct iwl_tof_range_rsp_ap_entry_ntfy ap[IWL_MVM_TOF_MAX_APS]; -} __packed; - -#define IWL_MVM_TOF_MCSI_BUF_SIZE (245) -/** - * struct iwl_tof_mcsi_notif - used for debug - * @token: token ID for the current session - * @role: '0' - initiator, '1' - responder - * @reserved: reserved - * @initiator_bssid: initiator machine - * @responder_bssid: responder machine - * @mcsi_buffer: debug data - */ -struct iwl_tof_mcsi_notif { - u8 token; - u8 role; - __le16 reserved; - u8 initiator_bssid[ETH_ALEN]; - u8 responder_bssid[ETH_ALEN]; - u8 mcsi_buffer[IWL_MVM_TOF_MCSI_BUF_SIZE * 4]; -} __packed; - -/** - * struct iwl_tof_neighbor_report_notif - * @bssid: BSSID of the AP which sent the report - * @request_token: same token as the corresponding request - * @status: - * @report_ie_len: the length of the response frame starting from the Element ID - * @data: the IEs - */ -struct iwl_tof_neighbor_report { - u8 bssid[ETH_ALEN]; - u8 request_token; - u8 status; - __le16 report_ie_len; - u8 data[]; -} __packed; - -/** - * struct iwl_tof_range_abort_cmd - * @request_id: corresponds to a range request - * @reserved: reserved - */ -struct iwl_tof_range_abort_cmd { - __le32 sub_grp_cmd_id; - u8 request_id; - u8 reserved[3]; -} __packed; - -#endif diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 81f557c0b58d..dfe02bdb7481 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -303,7 +303,6 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_LAR_SUPPORT: supports Location Aware Regulatory * @IWL_UCODE_TLV_CAPA_UMAC_SCAN: supports UMAC scan. * @IWL_UCODE_TLV_CAPA_BEAMFORMER: supports Beamformer - * @IWL_UCODE_TLV_CAPA_TOF_SUPPORT: supports Time of Flight (802.11mc FTM) * @IWL_UCODE_TLV_CAPA_TDLS_SUPPORT: support basic TDLS functionality * @IWL_UCODE_TLV_CAPA_TXPOWER_INSERTION_SUPPORT: supports insertion of current * tx power value into TPC Report action frame and Link Measurement Report @@ -369,7 +368,6 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_LAR_SUPPORT = (__force iwl_ucode_tlv_capa_t)1, IWL_UCODE_TLV_CAPA_UMAC_SCAN = (__force iwl_ucode_tlv_capa_t)2, IWL_UCODE_TLV_CAPA_BEAMFORMER = (__force iwl_ucode_tlv_capa_t)3, - IWL_UCODE_TLV_CAPA_TOF_SUPPORT = (__force iwl_ucode_tlv_capa_t)5, IWL_UCODE_TLV_CAPA_TDLS_SUPPORT = (__force iwl_ucode_tlv_capa_t)6, IWL_UCODE_TLV_CAPA_TXPOWER_INSERTION_SUPPORT = (__force iwl_ucode_tlv_capa_t)8, IWL_UCODE_TLV_CAPA_DS_PARAM_SET_IE_SUPPORT = (__force iwl_ucode_tlv_capa_t)9, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index 9ffd21918b5a..87373b099fd9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -7,7 +7,6 @@ iwlmvm-y += power.o coex.o iwlmvm-y += tt.o offloading.o tdls.o iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o -iwlmvm-y += tof.o iwlmvm-$(CONFIG_PM) += d3.o ccflags-y += -I$(src)/../ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 33b0af24a537..1c0f84049573 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -60,7 +60,6 @@ * *****************************************************************************/ #include "mvm.h" -#include "fw/api/tof.h" #include "debugfs.h" static void iwl_dbgfs_update_pm(struct iwl_mvm *mvm, @@ -523,751 +522,6 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, return simple_read_from_buffer(user_buf, count, ppos, buf, pos); } -static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif, - char *buf, - size_t count, loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - u32 value; - int ret = -EINVAL; - char *data; - - mutex_lock(&mvm->mutex); - - data = iwl_dbgfs_is_match("tof_disabled=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.tof_cfg.tof_disabled = value; - goto out; - } - - data = iwl_dbgfs_is_match("one_sided_disabled=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.tof_cfg.one_sided_disabled = value; - goto out; - } - - data = iwl_dbgfs_is_match("is_debug_mode=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.tof_cfg.is_debug_mode = value; - goto out; - } - - data = iwl_dbgfs_is_match("is_buf=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.tof_cfg.is_buf_required = value; - goto out; - } - - data = iwl_dbgfs_is_match("send_tof_cfg=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) { - ret = iwl_mvm_tof_config_cmd(mvm); - goto out; - } - } - -out: - mutex_unlock(&mvm->mutex); - - return ret ?: count; -} - -static ssize_t iwl_dbgfs_tof_enable_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char buf[256]; - int pos = 0; - const size_t bufsz = sizeof(buf); - struct iwl_tof_config_cmd *cmd; - - cmd = &mvm->tof_data.tof_cfg; - - mutex_lock(&mvm->mutex); - - pos += scnprintf(buf + pos, bufsz - pos, "tof_disabled = %d\n", - cmd->tof_disabled); - pos += scnprintf(buf + pos, bufsz - pos, "one_sided_disabled = %d\n", - cmd->one_sided_disabled); - pos += scnprintf(buf + pos, bufsz - pos, "is_debug_mode = %d\n", - cmd->is_debug_mode); - pos += scnprintf(buf + pos, bufsz - pos, "is_buf_required = %d\n", - cmd->is_buf_required); - - mutex_unlock(&mvm->mutex); - - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); -} - -static ssize_t iwl_dbgfs_tof_responder_params_write(struct ieee80211_vif *vif, - char *buf, - size_t count, loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - u32 value; - int ret = 0; - char *data; - - mutex_lock(&mvm->mutex); - - data = iwl_dbgfs_is_match("burst_period=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (!ret) - mvm->tof_data.responder_cfg.burst_period = - cpu_to_le16(value); - goto out; - } - - data = iwl_dbgfs_is_match("min_delta_ftm=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.min_delta_ftm = value; - goto out; - } - - data = iwl_dbgfs_is_match("burst_duration=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.burst_duration = value; - goto out; - } - - data = iwl_dbgfs_is_match("num_of_burst_exp=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.num_of_burst_exp = value; - goto out; - } - - data = iwl_dbgfs_is_match("abort_responder=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.abort_responder = value; - goto out; - } - - data = iwl_dbgfs_is_match("get_ch_est=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.get_ch_est = value; - goto out; - } - - data = iwl_dbgfs_is_match("recv_sta_req_params=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.recv_sta_req_params = value; - goto out; - } - - data = iwl_dbgfs_is_match("channel_num=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.channel_num = value; - goto out; - } - - data = iwl_dbgfs_is_match("bandwidth=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.bandwidth = value; - goto out; - } - - data = iwl_dbgfs_is_match("rate=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.rate = value; - goto out; - } - - data = iwl_dbgfs_is_match("bssid=", buf); - if (data) { - u8 *mac = mvm->tof_data.responder_cfg.bssid; - - if (!mac_pton(data, mac)) { - ret = -EINVAL; - goto out; - } - } - - data = iwl_dbgfs_is_match("tsf_timer_offset_msecs=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.tsf_timer_offset_msecs = - cpu_to_le16(value); - goto out; - } - - data = iwl_dbgfs_is_match("toa_offset=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.toa_offset = - cpu_to_le16(value); - goto out; - } - - data = iwl_dbgfs_is_match("center_freq=", buf); - if (data) { - struct iwl_tof_responder_config_cmd *cmd = - &mvm->tof_data.responder_cfg; - - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) { - enum nl80211_band band = (cmd->channel_num <= 14) ? - NL80211_BAND_2GHZ : - NL80211_BAND_5GHZ; - struct ieee80211_channel chn = { - .band = band, - .center_freq = ieee80211_channel_to_frequency( - cmd->channel_num, band), - }; - struct cfg80211_chan_def chandef = { - .chan = &chn, - .center_freq1 = - ieee80211_channel_to_frequency(value, - band), - }; - - cmd->ctrl_ch_position = iwl_mvm_get_ctrl_pos(&chandef); - } - goto out; - } - - data = iwl_dbgfs_is_match("ftm_per_burst=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.ftm_per_burst = value; - goto out; - } - - data = iwl_dbgfs_is_match("ftm_resp_ts_avail=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.ftm_resp_ts_avail = value; - goto out; - } - - data = iwl_dbgfs_is_match("asap_mode=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.responder_cfg.asap_mode = value; - goto out; - } - - data = iwl_dbgfs_is_match("send_responder_cfg=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) { - ret = iwl_mvm_tof_responder_cmd(mvm, vif); - goto out; - } - } - -out: - mutex_unlock(&mvm->mutex); - - return ret ?: count; -} - -static ssize_t iwl_dbgfs_tof_responder_params_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char buf[256]; - int pos = 0; - const size_t bufsz = sizeof(buf); - struct iwl_tof_responder_config_cmd *cmd; - - cmd = &mvm->tof_data.responder_cfg; - - mutex_lock(&mvm->mutex); - - pos += scnprintf(buf + pos, bufsz - pos, "burst_period = %d\n", - le16_to_cpu(cmd->burst_period)); - pos += scnprintf(buf + pos, bufsz - pos, "burst_duration = %d\n", - cmd->burst_duration); - pos += scnprintf(buf + pos, bufsz - pos, "bandwidth = %d\n", - cmd->bandwidth); - pos += scnprintf(buf + pos, bufsz - pos, "channel_num = %d\n", - cmd->channel_num); - pos += scnprintf(buf + pos, bufsz - pos, "ctrl_ch_position = 0x%x\n", - cmd->ctrl_ch_position); - pos += scnprintf(buf + pos, bufsz - pos, "bssid = %pM\n", - cmd->bssid); - pos += scnprintf(buf + pos, bufsz - pos, "min_delta_ftm = %d\n", - cmd->min_delta_ftm); - pos += scnprintf(buf + pos, bufsz - pos, "num_of_burst_exp = %d\n", - cmd->num_of_burst_exp); - pos += scnprintf(buf + pos, bufsz - pos, "rate = %d\n", cmd->rate); - pos += scnprintf(buf + pos, bufsz - pos, "abort_responder = %d\n", - cmd->abort_responder); - pos += scnprintf(buf + pos, bufsz - pos, "get_ch_est = %d\n", - cmd->get_ch_est); - pos += scnprintf(buf + pos, bufsz - pos, "recv_sta_req_params = %d\n", - cmd->recv_sta_req_params); - pos += scnprintf(buf + pos, bufsz - pos, "ftm_per_burst = %d\n", - cmd->ftm_per_burst); - pos += scnprintf(buf + pos, bufsz - pos, "ftm_resp_ts_avail = %d\n", - cmd->ftm_resp_ts_avail); - pos += scnprintf(buf + pos, bufsz - pos, "asap_mode = %d\n", - cmd->asap_mode); - pos += scnprintf(buf + pos, bufsz - pos, - "tsf_timer_offset_msecs = %d\n", - le16_to_cpu(cmd->tsf_timer_offset_msecs)); - pos += scnprintf(buf + pos, bufsz - pos, "toa_offset = %d\n", - le16_to_cpu(cmd->toa_offset)); - - mutex_unlock(&mvm->mutex); - - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); -} - -static ssize_t iwl_dbgfs_tof_range_request_write(struct ieee80211_vif *vif, - char *buf, size_t count, - loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - u32 value; - int ret = 0; - char *data; - - mutex_lock(&mvm->mutex); - - data = iwl_dbgfs_is_match("request_id=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.request_id = value; - goto out; - } - - data = iwl_dbgfs_is_match("initiator=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.initiator = value; - goto out; - } - - data = iwl_dbgfs_is_match("one_sided_los_disable=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.one_sided_los_disable = value; - goto out; - } - - data = iwl_dbgfs_is_match("req_timeout=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.req_timeout = value; - goto out; - } - - data = iwl_dbgfs_is_match("report_policy=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.report_policy = value; - goto out; - } - - data = iwl_dbgfs_is_match("macaddr_random=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.macaddr_random = value; - goto out; - } - - data = iwl_dbgfs_is_match("num_of_ap=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req.num_of_ap = value; - goto out; - } - - data = iwl_dbgfs_is_match("macaddr_template=", buf); - if (data) { - u8 mac[ETH_ALEN]; - - if (!mac_pton(data, mac)) { - ret = -EINVAL; - goto out; - } - memcpy(mvm->tof_data.range_req.macaddr_template, mac, ETH_ALEN); - goto out; - } - - data = iwl_dbgfs_is_match("macaddr_mask=", buf); - if (data) { - u8 mac[ETH_ALEN]; - - if (!mac_pton(data, mac)) { - ret = -EINVAL; - goto out; - } - memcpy(mvm->tof_data.range_req.macaddr_mask, mac, ETH_ALEN); - goto out; - } - - data = iwl_dbgfs_is_match("ap=", buf); - if (data) { - struct iwl_tof_range_req_ap_entry ap = {}; - int size = sizeof(struct iwl_tof_range_req_ap_entry); - u16 burst_period; - u8 *mac = ap.bssid; - unsigned int i; - - if (sscanf(data, "%u %hhd %hhd %hhd" - "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx" - "%hhd %hhd %hd" - "%hhd %hhd %d" - "%hhx %hhd %hhd %hhd", - &i, &ap.channel_num, &ap.bandwidth, - &ap.ctrl_ch_position, - mac, mac + 1, mac + 2, mac + 3, mac + 4, mac + 5, - &ap.measure_type, &ap.num_of_bursts, - &burst_period, - &ap.samples_per_burst, &ap.retries_per_sample, - &ap.tsf_delta, &ap.location_req, &ap.asap_mode, - &ap.enable_dyn_ack, &ap.rssi) != 20) { - ret = -EINVAL; - goto out; - } - if (i >= IWL_MVM_TOF_MAX_APS) { - IWL_ERR(mvm, "Invalid AP index %d\n", i); - ret = -EINVAL; - goto out; - } - - ap.burst_period = cpu_to_le16(burst_period); - - memcpy(&mvm->tof_data.range_req.ap[i], &ap, size); - goto out; - } - - data = iwl_dbgfs_is_match("send_range_request=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) - ret = iwl_mvm_tof_range_request_cmd(mvm, vif); - goto out; - } - - ret = -EINVAL; -out: - mutex_unlock(&mvm->mutex); - return ret ?: count; -} - -static ssize_t iwl_dbgfs_tof_range_request_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char buf[512]; - int pos = 0; - const size_t bufsz = sizeof(buf); - struct iwl_tof_range_req_cmd *cmd; - int i; - - cmd = &mvm->tof_data.range_req; - - mutex_lock(&mvm->mutex); - - pos += scnprintf(buf + pos, bufsz - pos, "request_id= %d\n", - cmd->request_id); - pos += scnprintf(buf + pos, bufsz - pos, "initiator= %d\n", - cmd->initiator); - pos += scnprintf(buf + pos, bufsz - pos, "one_sided_los_disable = %d\n", - cmd->one_sided_los_disable); - pos += scnprintf(buf + pos, bufsz - pos, "req_timeout= %d\n", - cmd->req_timeout); - pos += scnprintf(buf + pos, bufsz - pos, "report_policy= %d\n", - cmd->report_policy); - pos += scnprintf(buf + pos, bufsz - pos, "macaddr_random= %d\n", - cmd->macaddr_random); - pos += scnprintf(buf + pos, bufsz - pos, "macaddr_template= %pM\n", - cmd->macaddr_template); - pos += scnprintf(buf + pos, bufsz - pos, "macaddr_mask= %pM\n", - cmd->macaddr_mask); - pos += scnprintf(buf + pos, bufsz - pos, "num_of_ap= %d\n", - cmd->num_of_ap); - for (i = 0; i < cmd->num_of_ap; i++) { - struct iwl_tof_range_req_ap_entry *ap = &cmd->ap[i]; - - pos += scnprintf(buf + pos, bufsz - pos, - "ap %.2d: channel_num=%hhd bw=%hhd" - " control=%hhd bssid=%pM type=%hhd" - " num_of_bursts=%hhd burst_period=%hd ftm=%hhd" - " retries=%hhd tsf_delta=%d" - " tsf_delta_direction=%hhd location_req=0x%hhx " - " asap=%hhd enable=%hhd rssi=%hhd\n", - i, ap->channel_num, ap->bandwidth, - ap->ctrl_ch_position, ap->bssid, - ap->measure_type, ap->num_of_bursts, - ap->burst_period, ap->samples_per_burst, - ap->retries_per_sample, ap->tsf_delta, - ap->tsf_delta_direction, - ap->location_req, ap->asap_mode, - ap->enable_dyn_ack, ap->rssi); - } - - mutex_unlock(&mvm->mutex); - - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); -} - -static ssize_t iwl_dbgfs_tof_range_req_ext_write(struct ieee80211_vif *vif, - char *buf, - size_t count, loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - u32 value; - int ret = 0; - char *data; - - mutex_lock(&mvm->mutex); - - data = iwl_dbgfs_is_match("tsf_timer_offset_msec=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req_ext.tsf_timer_offset_msec = - cpu_to_le16(value); - goto out; - } - - data = iwl_dbgfs_is_match("min_delta_ftm=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req_ext.min_delta_ftm = value; - goto out; - } - - data = iwl_dbgfs_is_match("ftm_format_and_bw20M=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req_ext.ftm_format_and_bw20M = - value; - goto out; - } - - data = iwl_dbgfs_is_match("ftm_format_and_bw40M=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req_ext.ftm_format_and_bw40M = - value; - goto out; - } - - data = iwl_dbgfs_is_match("ftm_format_and_bw80M=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.range_req_ext.ftm_format_and_bw80M = - value; - goto out; - } - - data = iwl_dbgfs_is_match("send_range_req_ext=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) - ret = iwl_mvm_tof_range_request_ext_cmd(mvm, vif); - goto out; - } - - ret = -EINVAL; -out: - mutex_unlock(&mvm->mutex); - return ret ?: count; -} - -static ssize_t iwl_dbgfs_tof_range_req_ext_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char buf[256]; - int pos = 0; - const size_t bufsz = sizeof(buf); - struct iwl_tof_range_req_ext_cmd *cmd; - - cmd = &mvm->tof_data.range_req_ext; - - mutex_lock(&mvm->mutex); - - pos += scnprintf(buf + pos, bufsz - pos, - "tsf_timer_offset_msec = %hd\n", - cmd->tsf_timer_offset_msec); - pos += scnprintf(buf + pos, bufsz - pos, "min_delta_ftm = %hhd\n", - cmd->min_delta_ftm); - pos += scnprintf(buf + pos, bufsz - pos, - "ftm_format_and_bw20M = %hhd\n", - cmd->ftm_format_and_bw20M); - pos += scnprintf(buf + pos, bufsz - pos, - "ftm_format_and_bw40M = %hhd\n", - cmd->ftm_format_and_bw40M); - pos += scnprintf(buf + pos, bufsz - pos, - "ftm_format_and_bw80M = %hhd\n", - cmd->ftm_format_and_bw80M); - - mutex_unlock(&mvm->mutex); - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); -} - -static ssize_t iwl_dbgfs_tof_range_abort_write(struct ieee80211_vif *vif, - char *buf, - size_t count, loff_t *ppos) -{ - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - u32 value; - int abort_id, ret = 0; - char *data; - - mutex_lock(&mvm->mutex); - - data = iwl_dbgfs_is_match("abort_id=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0) - mvm->tof_data.last_abort_id = value; - goto out; - } - - data = iwl_dbgfs_is_match("send_range_abort=", buf); - if (data) { - ret = kstrtou32(data, 10, &value); - if (ret == 0 && value) { - abort_id = mvm->tof_data.last_abort_id; - ret = iwl_mvm_tof_range_abort_cmd(mvm, abort_id); - goto out; - } - } - -out: - mutex_unlock(&mvm->mutex); - return ret ?: count; -} - -static ssize_t iwl_dbgfs_tof_range_abort_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char buf[32]; - int pos = 0; - const size_t bufsz = sizeof(buf); - int last_abort_id; - - mutex_lock(&mvm->mutex); - last_abort_id = mvm->tof_data.last_abort_id; - mutex_unlock(&mvm->mutex); - - pos += scnprintf(buf + pos, bufsz - pos, "last_abort_id = %d\n", - last_abort_id); - return simple_read_from_buffer(user_buf, count, ppos, buf, pos); -} - -static ssize_t iwl_dbgfs_tof_range_response_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_vif *vif = file->private_data; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = mvmvif->mvm; - char *buf; - int pos = 0; - const size_t bufsz = sizeof(struct iwl_tof_range_rsp_ntfy) + 256; - struct iwl_tof_range_rsp_ntfy *cmd; - int i, ret; - - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - mutex_lock(&mvm->mutex); - cmd = &mvm->tof_data.range_resp; - - pos += scnprintf(buf + pos, bufsz - pos, "request_id = %d\n", - cmd->request_id); - pos += scnprintf(buf + pos, bufsz - pos, "status = %d\n", - cmd->request_status); - pos += scnprintf(buf + pos, bufsz - pos, "last_in_batch = %d\n", - cmd->last_in_batch); - pos += scnprintf(buf + pos, bufsz - pos, "num_of_aps = %d\n", - cmd->num_of_aps); - for (i = 0; i < cmd->num_of_aps; i++) { - struct iwl_tof_range_rsp_ap_entry_ntfy *ap = &cmd->ap[i]; - - pos += scnprintf(buf + pos, bufsz - pos, - "ap %.2d: bssid=%pM status=%hhd bw=%hhd" - " rtt=%d rtt_var=%d rtt_spread=%d" - " rssi=%hhd rssi_spread=%hhd" - " range=%d range_var=%d" - " time_stamp=%d\n", - i, ap->bssid, ap->measure_status, - ap->measure_bw, - ap->rtt, ap->rtt_variance, ap->rtt_spread, - ap->rssi, ap->rssi_spread, ap->range, - ap->range_variance, ap->timestamp); - } - mutex_unlock(&mvm->mutex); - - ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos); - kfree(buf); - return ret; -} - static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf, size_t count, loff_t *ppos) { @@ -1458,12 +712,6 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(bf_params, 256); MVM_DEBUGFS_READ_WRITE_FILE_OPS(low_latency, 10); MVM_DEBUGFS_READ_WRITE_FILE_OPS(uapsd_misbehaving, 20); MVM_DEBUGFS_READ_WRITE_FILE_OPS(rx_phyinfo, 10); -MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_enable, 32); -MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_request, 512); -MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_req_ext, 32); -MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_abort, 32); -MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); -MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff); @@ -1506,24 +754,6 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) mvmvif == mvm->bf_allowed_vif) MVM_DEBUGFS_ADD_FILE_VIF(bf_params, mvmvif->dbgfs_dir, 0600); - if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT) && - !vif->p2p && (vif->type != NL80211_IFTYPE_P2P_DEVICE)) { - if (IWL_MVM_TOF_IS_RESPONDER && vif->type == NL80211_IFTYPE_AP) - MVM_DEBUGFS_ADD_FILE_VIF(tof_responder_params, - mvmvif->dbgfs_dir, 0600); - - MVM_DEBUGFS_ADD_FILE_VIF(tof_range_request, mvmvif->dbgfs_dir, - 0600); - MVM_DEBUGFS_ADD_FILE_VIF(tof_range_req_ext, mvmvif->dbgfs_dir, - 0600); - MVM_DEBUGFS_ADD_FILE_VIF(tof_enable, mvmvif->dbgfs_dir, - 0600); - MVM_DEBUGFS_ADD_FILE_VIF(tof_range_abort, mvmvif->dbgfs_dir, - 0600); - MVM_DEBUGFS_ADD_FILE_VIF(tof_range_response, mvmvif->dbgfs_dir, - 0400); - } - /* * Create symlink for convenience pointing to interface specific * debugfs entries for the driver. For example, under diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 143c7fcaea41..851bfb259f10 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,6 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -89,7 +91,6 @@ #include "fw/api/sf.h" #include "fw/api/sta.h" #include "fw/api/stats.h" -#include "fw/api/tof.h" #include "fw/api/tx.h" #endif /* __fw_api_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index a289b9488c03..5d28080e257e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -83,7 +83,6 @@ #include "sta.h" #include "fw-api.h" #include "constants.h" -#include "tof.h" #include "fw/runtime.h" #include "fw/dbg.h" #include "fw/acpi.h" @@ -1124,7 +1123,6 @@ struct iwl_mvm { u32 ciphers[IWL_MVM_NUM_CIPHERS]; struct ieee80211_cipher_scheme cs[IWL_UCODE_MAX_CS]; - struct iwl_mvm_tof_data tof_data; struct ieee80211_vif *nan_vif; #define IWL_MAX_BAID 32 @@ -1184,7 +1182,6 @@ enum iwl_mvm_init_status { IWL_MVM_INIT_STATUS_THERMAL_INIT_COMPLETE = BIT(0), IWL_MVM_INIT_STATUS_LEDS_INIT_COMPLETE = BIT(1), IWL_MVM_INIT_STATUS_REG_HW_INIT_COMPLETE = BIT(2), - IWL_MVM_INIT_STATUS_TOF_INIT_COMPLETE = BIT(3), }; static inline bool iwl_mvm_is_radio_killed(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index ee2ef3f0042c..93b7cc5910fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -301,8 +301,6 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = { RX_HANDLER_ASYNC_LOCKED), RX_HANDLER(MFUART_LOAD_NOTIFICATION, iwl_mvm_rx_mfuart_notif, RX_HANDLER_SYNC), - RX_HANDLER(TOF_NOTIFICATION, iwl_mvm_tof_resp_handler, - RX_HANDLER_ASYNC_LOCKED), RX_HANDLER_GRP(DEBUG_GROUP, MFU_ASSERT_DUMP_NTF, iwl_mvm_mfu_assert_dump_notif, RX_HANDLER_SYNC), RX_HANDLER_GRP(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF, @@ -329,8 +327,6 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = { HCMD_NAME(SCAN_REQ_UMAC), HCMD_NAME(SCAN_ABORT_UMAC), HCMD_NAME(SCAN_COMPLETE_UMAC), - HCMD_NAME(TOF_CMD), - HCMD_NAME(TOF_NOTIFICATION), HCMD_NAME(BA_WINDOW_STATUS_NOTIFICATION_ID), HCMD_NAME(ADD_STA_KEY), HCMD_NAME(ADD_STA), @@ -846,8 +842,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, if (iwl_mvm_is_d0i3_supported(mvm)) iwl_trans_unref(mvm->trans); - iwl_mvm_tof_init(mvm); - iwl_mvm_toggle_tx_ant(mvm, &mvm->mgmt_last_antenna_idx); return op_mode; @@ -913,8 +907,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) cancel_delayed_work_sync(&mvm->tcm.work); - iwl_mvm_tof_clean(mvm); - iwl_fw_runtime_free(&mvm->fwrt); mutex_destroy(&mvm->mutex); mutex_destroy(&mvm->d0i3_suspend_mutex); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tof.c b/drivers/net/wireless/intel/iwlwifi/mvm/tof.c deleted file mode 100644 index 01e0a999063b..000000000000 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tof.c +++ /dev/null @@ -1,305 +0,0 @@ -/****************************************************************************** - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Deutschland GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution - * in the file called COPYING. - * - * Contact Information: - * Intel Linux Wireless - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Deutschland GmbH - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - *****************************************************************************/ -#include "mvm.h" -#include "fw/api/tof.h" - -#define IWL_MVM_TOF_RANGE_REQ_MAX_ID 256 - -void iwl_mvm_tof_init(struct iwl_mvm *mvm) -{ - struct iwl_mvm_tof_data *tof_data = &mvm->tof_data; - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return; - - memset(tof_data, 0, sizeof(*tof_data)); - - tof_data->tof_cfg.sub_grp_cmd_id = cpu_to_le32(TOF_CONFIG_CMD); - -#ifdef CONFIG_IWLWIFI_DEBUGFS - if (IWL_MVM_TOF_IS_RESPONDER) { - tof_data->responder_cfg.sub_grp_cmd_id = - cpu_to_le32(TOF_RESPONDER_CONFIG_CMD); - tof_data->responder_cfg.sta_id = IWL_MVM_INVALID_STA; - } -#endif - - tof_data->range_req.sub_grp_cmd_id = cpu_to_le32(TOF_RANGE_REQ_CMD); - tof_data->range_req.req_timeout = 1; - tof_data->range_req.initiator = 1; - tof_data->range_req.report_policy = 3; - - tof_data->range_req_ext.sub_grp_cmd_id = - cpu_to_le32(TOF_RANGE_REQ_EXT_CMD); - - mvm->tof_data.active_range_request = IWL_MVM_TOF_RANGE_REQ_MAX_ID; - mvm->init_status |= IWL_MVM_INIT_STATUS_TOF_INIT_COMPLETE; -} - -void iwl_mvm_tof_clean(struct iwl_mvm *mvm) -{ - struct iwl_mvm_tof_data *tof_data = &mvm->tof_data; - - if (!fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_TOF_SUPPORT) || - !(mvm->init_status & IWL_MVM_INIT_STATUS_TOF_INIT_COMPLETE)) - return; - - memset(tof_data, 0, sizeof(*tof_data)); - mvm->tof_data.active_range_request = IWL_MVM_TOF_RANGE_REQ_MAX_ID; - mvm->init_status &= ~IWL_MVM_INIT_STATUS_TOF_INIT_COMPLETE; -} - -static void iwl_tof_iterator(void *_data, u8 *mac, - struct ieee80211_vif *vif) -{ - bool *enabled = _data; - - /* non bss vif exists */ - if (ieee80211_vif_type_p2p(vif) != NL80211_IFTYPE_STATION) - *enabled = false; -} - -int iwl_mvm_tof_config_cmd(struct iwl_mvm *mvm) -{ - struct iwl_tof_config_cmd *cmd = &mvm->tof_data.tof_cfg; - bool enabled; - - lockdep_assert_held(&mvm->mutex); - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return -EINVAL; - - ieee80211_iterate_active_interfaces_atomic(mvm->hw, - IEEE80211_IFACE_ITER_NORMAL, - iwl_tof_iterator, &enabled); - if (!enabled) { - IWL_DEBUG_INFO(mvm, "ToF is not supported (non bss vif)\n"); - return -EINVAL; - } - - mvm->tof_data.active_range_request = IWL_MVM_TOF_RANGE_REQ_MAX_ID; - return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_CMD, - IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(*cmd), cmd); -} - -int iwl_mvm_tof_range_abort_cmd(struct iwl_mvm *mvm, u8 id) -{ - struct iwl_tof_range_abort_cmd cmd = { - .sub_grp_cmd_id = cpu_to_le32(TOF_RANGE_ABORT_CMD), - .request_id = id, - }; - - lockdep_assert_held(&mvm->mutex); - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return -EINVAL; - - if (id != mvm->tof_data.active_range_request) { - IWL_ERR(mvm, "Invalid range request id %d (active %d)\n", - id, mvm->tof_data.active_range_request); - return -EINVAL; - } - - /* after abort is sent there's no active request anymore */ - mvm->tof_data.active_range_request = IWL_MVM_TOF_RANGE_REQ_MAX_ID; - - return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_CMD, - IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(cmd), &cmd); -} - -#ifdef CONFIG_IWLWIFI_DEBUGFS -int iwl_mvm_tof_responder_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) -{ - struct iwl_tof_responder_config_cmd *cmd = &mvm->tof_data.responder_cfg; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - - lockdep_assert_held(&mvm->mutex); - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return -EINVAL; - - if (vif->p2p || vif->type != NL80211_IFTYPE_AP || - !mvmvif->ap_ibss_active) { - IWL_ERR(mvm, "Cannot start responder, not in AP mode\n"); - return -EIO; - } - - cmd->sta_id = mvmvif->bcast_sta.sta_id; - memcpy(cmd->bssid, vif->addr, ETH_ALEN); - return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_CMD, - IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(*cmd), cmd); -} -#endif - -int iwl_mvm_tof_range_request_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) -{ - struct iwl_host_cmd cmd = { - .id = iwl_cmd_id(TOF_CMD, IWL_ALWAYS_LONG_GROUP, 0), - .len = { sizeof(mvm->tof_data.range_req), }, - /* no copy because of the command size */ - .dataflags = { IWL_HCMD_DFL_NOCOPY, }, - }; - - lockdep_assert_held(&mvm->mutex); - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return -EINVAL; - - if (ieee80211_vif_type_p2p(vif) != NL80211_IFTYPE_STATION) { - IWL_ERR(mvm, "Cannot send range request, not STA mode\n"); - return -EIO; - } - - /* nesting of range requests is not supported in FW */ - if (mvm->tof_data.active_range_request != - IWL_MVM_TOF_RANGE_REQ_MAX_ID) { - IWL_ERR(mvm, "Cannot send range req, already active req %d\n", - mvm->tof_data.active_range_request); - return -EIO; - } - - mvm->tof_data.active_range_request = mvm->tof_data.range_req.request_id; - - cmd.data[0] = &mvm->tof_data.range_req; - return iwl_mvm_send_cmd(mvm, &cmd); -} - -int iwl_mvm_tof_range_request_ext_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif) -{ - lockdep_assert_held(&mvm->mutex); - - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT)) - return -EINVAL; - - if (ieee80211_vif_type_p2p(vif) != NL80211_IFTYPE_STATION) { - IWL_ERR(mvm, "Cannot send ext range req, not in STA mode\n"); - return -EIO; - } - - return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_CMD, - IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(mvm->tof_data.range_req_ext), - &mvm->tof_data.range_req_ext); -} - -static int iwl_mvm_tof_range_resp(struct iwl_mvm *mvm, void *data) -{ - struct iwl_tof_range_rsp_ntfy *resp = (void *)data; - - if (resp->request_id != mvm->tof_data.active_range_request) { - IWL_ERR(mvm, "Request id mismatch, got %d, active %d\n", - resp->request_id, mvm->tof_data.active_range_request); - return -EIO; - } - - memcpy(&mvm->tof_data.range_resp, resp, - sizeof(struct iwl_tof_range_rsp_ntfy)); - mvm->tof_data.active_range_request = IWL_MVM_TOF_RANGE_REQ_MAX_ID; - - return 0; -} - -static int iwl_mvm_tof_mcsi_notif(struct iwl_mvm *mvm, void *data) -{ - struct iwl_tof_mcsi_notif *resp = (struct iwl_tof_mcsi_notif *)data; - - IWL_DEBUG_INFO(mvm, "MCSI notification, token %d\n", resp->token); - return 0; -} - -static int iwl_mvm_tof_nb_report_notif(struct iwl_mvm *mvm, void *data) -{ - struct iwl_tof_neighbor_report *report = - (struct iwl_tof_neighbor_report *)data; - - IWL_DEBUG_INFO(mvm, "NB report, bssid %pM, token %d, status 0x%x\n", - report->bssid, report->request_token, report->status); - return 0; -} - -void iwl_mvm_tof_resp_handler(struct iwl_mvm *mvm, - struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_tof_gen_resp_cmd *resp = (void *)pkt->data; - - lockdep_assert_held(&mvm->mutex); - - switch (le32_to_cpu(resp->sub_grp_cmd_id)) { - case TOF_RANGE_RESPONSE_NOTIF: - iwl_mvm_tof_range_resp(mvm, resp->data); - break; - case TOF_MCSI_DEBUG_NOTIF: - iwl_mvm_tof_mcsi_notif(mvm, resp->data); - break; - case TOF_NEIGHBOR_REPORT_RSP_NOTIF: - iwl_mvm_tof_nb_report_notif(mvm, resp->data); - break; - default: - IWL_ERR(mvm, "Unknown sub-group command 0x%x\n", - resp->sub_grp_cmd_id); - break; - } -} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tof.h b/drivers/net/wireless/intel/iwlwifi/mvm/tof.h deleted file mode 100644 index 8138d0606c52..000000000000 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tof.h +++ /dev/null @@ -1,89 +0,0 @@ -/****************************************************************************** - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2015 Intel Deutschland GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution - * in the file called COPYING. - * - * Contact Information: - * Intel Linux Wireless - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - * BSD LICENSE - * - * Copyright(c) 2015 Intel Deutschland GmbH - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - *****************************************************************************/ -#ifndef __tof_h__ -#define __tof_h__ - -#include "fw/api/tof.h" - -struct iwl_mvm_tof_data { - struct iwl_tof_config_cmd tof_cfg; - struct iwl_tof_range_req_cmd range_req; - struct iwl_tof_range_req_ext_cmd range_req_ext; -#ifdef CONFIG_IWLWIFI_DEBUGFS - struct iwl_tof_responder_config_cmd responder_cfg; -#endif - struct iwl_tof_range_rsp_ntfy range_resp; - u8 last_abort_id; - u16 active_range_request; -}; - -void iwl_mvm_tof_init(struct iwl_mvm *mvm); -void iwl_mvm_tof_clean(struct iwl_mvm *mvm); -int iwl_mvm_tof_config_cmd(struct iwl_mvm *mvm); -int iwl_mvm_tof_range_abort_cmd(struct iwl_mvm *mvm, u8 id); -int iwl_mvm_tof_range_request_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif); -void iwl_mvm_tof_resp_handler(struct iwl_mvm *mvm, - struct iwl_rx_cmd_buffer *rxb); -int iwl_mvm_tof_range_request_ext_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif); -#ifdef CONFIG_IWLWIFI_DEBUGFS -int iwl_mvm_tof_responder_cmd(struct iwl_mvm *mvm, - struct ieee80211_vif *vif); -#endif -#endif /* __tof_h__ */ From 4841914ef44066f2dce153770dd828ef307f28bb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Nov 2018 11:03:29 +0100 Subject: [PATCH 0287/1436] iwlwifi: dvm: remove useless condition If we're in the else branch of checking "tt->state == IWL_TI_CT_KILL" so there's no point in checking "tt->state != IWL_TI_CT_KILL" again. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/dvm/tt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/tt.c b/drivers/net/wireless/intel/iwlwifi/dvm/tt.c index 4de2727ac63e..a156dcf5b7d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/tt.c @@ -1,6 +1,7 @@ /****************************************************************************** * * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved. + * Copyright (C) 2018 Intel Corporation * * Portions of this file are derived from the ipw3945 project, as well * as portions of the ieee80211 subsystem header files. @@ -325,9 +326,9 @@ static void iwl_legacy_tt_handler(struct iwl_priv *priv, s32 temp, bool force) iwl_prepare_ct_kill_task(priv); tt->state = old_state; } - } else if (old_state == IWL_TI_CT_KILL && - tt->state != IWL_TI_CT_KILL) + } else if (old_state == IWL_TI_CT_KILL) { iwl_perform_ct_kill_task(priv, false); + } IWL_DEBUG_TEMP(priv, "Temperature state changed %u\n", tt->state); IWL_DEBUG_TEMP(priv, "Power Index change to %u\n", From 93079fd5c0597a89f6fa3a1a3334739de10d6b07 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Nov 2018 10:58:53 +0100 Subject: [PATCH 0288/1436] iwlwifi: pcie: use u32* argument to iwl_trans_get_fw_monitor_len() That's what we pass, and we don't want/need any negative values. Found by sparse/smatch. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 472a85d9a358..f74281508197 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3114,7 +3114,7 @@ iwl_trans_pcie_dump_monitor(struct iwl_trans *trans, return len; } -static int iwl_trans_get_fw_monitor_len(struct iwl_trans *trans, int *len) +static int iwl_trans_get_fw_monitor_len(struct iwl_trans *trans, u32 *len) { if (trans->num_blocks) { *len += sizeof(struct iwl_fw_error_dump_data) + From babea2d4fe76c6515da6231e8d940044bad686b1 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Sun, 4 Nov 2018 21:56:31 +0200 Subject: [PATCH 0289/1436] iwlwifi: mvm: Disconnect on large beacon loss Some buggy APs stop sending beacons, but continue to ack our null data packets or even run some traffic. It's better not to stick connected to such an AP forever, so disconnect after some larger beacon loss threshold is crossed. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index c868ebfa10ce..7cfdd07d8736 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1415,8 +1415,9 @@ void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, * TODO: the threshold should be adjusted based on latency conditions, * and/or in case of a CS flow on one of the other AP vifs. */ - if (le32_to_cpu(mb->consec_missed_beacons_since_last_rx) > - IWL_MVM_MISSED_BEACONS_THRESHOLD) + if (rx_missed_bcon > IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG) + iwl_mvm_connection_loss(mvm, vif, "missed beacons"); + else if (rx_missed_bcon_since_rx > IWL_MVM_MISSED_BEACONS_THRESHOLD) ieee80211_beacon_loss(vif); trigger = iwl_fw_dbg_trigger_on(&mvm->fwrt, ieee80211_vif_to_wdev(vif), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 5d28080e257e..55f08e433331 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -94,6 +94,8 @@ /* RSSI offset for WkP */ #define IWL_RSSI_OFFSET 50 #define IWL_MVM_MISSED_BEACONS_THRESHOLD 8 +#define IWL_MVM_MISSED_BEACONS_THRESHOLD_LONG 16 + /* A TimeUnit is 1024 microsecond */ #define MSEC_TO_TU(_msec) (_msec*1000/1024) From 6c161980ba482d063c6453a32924a706858043ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Oct 2018 10:29:31 +0100 Subject: [PATCH 0290/1436] iwlwifi: mvm: add location APIs Add the location/time-of-flight/FTM APIs that we'll use in follow-up patches to implement FTM responder and initiator. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/commands.h | 3 + .../wireless/intel/iwlwifi/fw/api/location.h | 691 ++++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/fw-api.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 17 + 4 files changed, 712 insertions(+) create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/api/location.h diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h index baba7bcee5fc..0290b333d860 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h @@ -77,6 +77,8 @@ * @DATA_PATH_GROUP: data path group, uses command IDs from * &enum iwl_data_path_subcmd_ids * @NAN_GROUP: NAN group, uses command IDs from &enum iwl_nan_subcmd_ids + * @LOCATION_GROUP: location group, uses command IDs from + * &enum iwl_location_subcmd_ids * @PROT_OFFLOAD_GROUP: protocol offload group, uses command IDs from * &enum iwl_prot_offload_subcmd_ids * @REGULATORY_AND_NVM_GROUP: regulatory/NVM group, uses command IDs from @@ -91,6 +93,7 @@ enum iwl_mvm_command_groups { PHY_OPS_GROUP = 0x4, DATA_PATH_GROUP = 0x5, NAN_GROUP = 0x7, + LOCATION_GROUP = 0x8, PROT_OFFLOAD_GROUP = 0xb, REGULATORY_AND_NVM_GROUP = 0xc, DEBUG_GROUP = 0xf, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h new file mode 100644 index 000000000000..0df0851b9513 --- /dev/null +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h @@ -0,0 +1,691 @@ +/****************************************************************************** + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + * + * Contact Information: + * Intel Linux Wireless + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + * BSD LICENSE + * + * Copyright(c) 2015 - 2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef __iwl_fw_api_location_h__ +#define __iwl_fw_api_location_h__ + +/** + * enum iwl_location_subcmd_ids - location group command IDs + */ +enum iwl_location_subcmd_ids { + /** + * @TOF_RANGE_REQ_CMD: TOF ranging request, + * uses &struct iwl_tof_range_req_cmd + */ + TOF_RANGE_REQ_CMD = 0x0, + /** + * @TOF_CONFIG_CMD: TOF configuration, uses &struct iwl_tof_config_cmd + */ + TOF_CONFIG_CMD = 0x1, + /** + * @TOF_RANGE_ABORT_CMD: abort ongoing ranging, uses + * &struct iwl_tof_range_abort_cmd + */ + TOF_RANGE_ABORT_CMD = 0x2, + /** + * @TOF_RANGE_REQ_EXT_CMD: TOF extended ranging config, + * uses &struct iwl_tof_range_request_ext_cmd + */ + TOF_RANGE_REQ_EXT_CMD = 0x3, + /** + * @TOF_RESPONDER_CONFIG_CMD: FTM responder configuration, + * uses &struct iwl_tof_responder_config_cmd + */ + TOF_RESPONDER_CONFIG_CMD = 0x4, + /** + * @TOF_RESPONDER_DYN_CONFIG_CMD: FTM dynamic configuration, + * uses &struct iwl_tof_responder_dyn_config_cmd + */ + TOF_RESPONDER_DYN_CONFIG_CMD = 0x5, + /** + * @TOF_LC_NOTIF: used for LCI/civic location, contains just + * the action frame + */ + TOF_LC_NOTIF = 0xFC, + /** + * @TOF_RESPONDER_STATS: FTM responder statistics notification, + * uses &struct iwl_ftm_responder_stats + */ + TOF_RESPONDER_STATS = 0xFD, + /** + * @TOF_MCSI_DEBUG_NOTIF: MCSI debug notification, uses + * &struct iwl_tof_mcsi_notif + */ + TOF_MCSI_DEBUG_NOTIF = 0xFE, + /** + * @TOF_RANGE_RESPONSE_NOTIF: ranging response, using + * &struct iwl_tof_range_rsp_ntfy + */ + TOF_RANGE_RESPONSE_NOTIF = 0xFF, +}; + +/** + * struct iwl_tof_config_cmd - ToF configuration + * @tof_disabled: indicates if ToF is disabled (or not) + * @one_sided_disabled: indicates if one-sided is disabled (or not) + * @is_debug_mode: indiciates if debug mode is active + * @is_buf_required: indicates if channel estimation buffer is required + */ +struct iwl_tof_config_cmd { + u8 tof_disabled; + u8 one_sided_disabled; + u8 is_debug_mode; + u8 is_buf_required; +} __packed; + +/** + * enum iwl_tof_bandwidth - values for iwl_tof_range_req_ap_entry.bandwidth + * @IWL_TOF_BW_20_LEGACY: 20 MHz non-HT + * @IWL_TOF_BW_20_HT: 20 MHz HT + * @IWL_TOF_BW_40: 40 MHz + * @IWL_TOF_BW_80: 80 MHz + * @IWL_TOF_BW_160: 160 MHz + */ +enum iwl_tof_bandwidth { + IWL_TOF_BW_20_LEGACY, + IWL_TOF_BW_20_HT, + IWL_TOF_BW_40, + IWL_TOF_BW_80, + IWL_TOF_BW_160, +}; /* LOCAT_BW_TYPE_E */ + +/* + * enum iwl_tof_algo_type - Algorithym type for range measurement request + */ +enum iwl_tof_algo_type { + IWL_TOF_ALGO_TYPE_MAX_LIKE = 0, + IWL_TOF_ALGO_TYPE_LINEAR_REG = 1, + IWL_TOF_ALGO_TYPE_FFT = 2, + + /* Keep last */ + IWL_TOF_ALGO_TYPE_INVALID, +}; /* ALGO_TYPE_E */ + +/* + * enum iwl_tof_mcsi_ntfy - Enable/Disable MCSI notifications + */ +enum iwl_tof_mcsi_enable { + IWL_TOF_MCSI_DISABLED = 0, + IWL_TOF_MCSI_ENABLED = 1, +}; /* MCSI_ENABLE_E */ + +/** + * enum iwl_tof_responder_cmd_valid_field - valid fields in the responder cfg + * @IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO: channel info is valid + * @IWL_TOF_RESPONDER_CMD_VALID_TOA_OFFSET: ToA offset is valid + * @IWL_TOF_RESPONDER_CMD_VALID_COMMON_CALIB: common calibration mode is valid + * @IWL_TOF_RESPONDER_CMD_VALID_SPECIFIC_CALIB: spefici calibration mode is + * valid + * @IWL_TOF_RESPONDER_CMD_VALID_BSSID: BSSID is valid + * @IWL_TOF_RESPONDER_CMD_VALID_TX_ANT: TX antenna is valid + * @IWL_TOF_RESPONDER_CMD_VALID_ALGO_TYPE: algorithm type is valid + * @IWL_TOF_RESPONDER_CMD_VALID_NON_ASAP_SUPPORT: non-ASAP support is valid + * @IWL_TOF_RESPONDER_CMD_VALID_STATISTICS_REPORT_SUPPORT: statistics report + * support is valid + * @IWL_TOF_RESPONDER_CMD_VALID_MCSI_NOTIF_SUPPORT: MCSI notification support + * is valid + * @IWL_TOF_RESPONDER_CMD_VALID_FAST_ALGO_SUPPORT: fast algorithm support + * is valid + * @IWL_TOF_RESPONDER_CMD_VALID_RETRY_ON_ALGO_FAIL: retry on algorithm failure + * is valid + * @IWL_TOF_RESPONDER_CMD_VALID_STA_ID: station ID is valid + */ +enum iwl_tof_responder_cmd_valid_field { + IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO = BIT(0), + IWL_TOF_RESPONDER_CMD_VALID_TOA_OFFSET = BIT(1), + IWL_TOF_RESPONDER_CMD_VALID_COMMON_CALIB = BIT(2), + IWL_TOF_RESPONDER_CMD_VALID_SPECIFIC_CALIB = BIT(3), + IWL_TOF_RESPONDER_CMD_VALID_BSSID = BIT(4), + IWL_TOF_RESPONDER_CMD_VALID_TX_ANT = BIT(5), + IWL_TOF_RESPONDER_CMD_VALID_ALGO_TYPE = BIT(6), + IWL_TOF_RESPONDER_CMD_VALID_NON_ASAP_SUPPORT = BIT(7), + IWL_TOF_RESPONDER_CMD_VALID_STATISTICS_REPORT_SUPPORT = BIT(8), + IWL_TOF_RESPONDER_CMD_VALID_MCSI_NOTIF_SUPPORT = BIT(9), + IWL_TOF_RESPONDER_CMD_VALID_FAST_ALGO_SUPPORT = BIT(10), + IWL_TOF_RESPONDER_CMD_VALID_RETRY_ON_ALGO_FAIL = BIT(11), + IWL_TOF_RESPONDER_CMD_VALID_STA_ID = BIT(12), +}; + +/** + * enum iwl_tof_responder_cfg_flags - responder configuration flags + * @IWL_TOF_RESPONDER_FLAGS_NON_ASAP_SUPPORT: non-ASAP support + * @IWL_TOF_RESPONDER_FLAGS_REPORT_STATISTICS: report statistics + * @IWL_TOF_RESPONDER_FLAGS_REPORT_MCSI: report MCSI + * @IWL_TOF_RESPONDER_FLAGS_ALGO_TYPE: algorithm type + * @IWL_TOF_RESPONDER_FLAGS_TOA_OFFSET_MODE: ToA offset mode + * @IWL_TOF_RESPONDER_FLAGS_COMMON_CALIB_MODE: common calibration mode + * @IWL_TOF_RESPONDER_FLAGS_SPECIFIC_CALIB_MODE: specific calibration mode + * @IWL_TOF_RESPONDER_FLAGS_FAST_ALGO_SUPPORT: fast algorithm support + * @IWL_TOF_RESPONDER_FLAGS_RETRY_ON_ALGO_FAIL: retry on algorithm fail + * @IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT: TX antenna mask + */ +enum iwl_tof_responder_cfg_flags { + IWL_TOF_RESPONDER_FLAGS_NON_ASAP_SUPPORT = BIT(0), + IWL_TOF_RESPONDER_FLAGS_REPORT_STATISTICS = BIT(1), + IWL_TOF_RESPONDER_FLAGS_REPORT_MCSI = BIT(2), + IWL_TOF_RESPONDER_FLAGS_ALGO_TYPE = BIT(3) | BIT(4) | BIT(5), + IWL_TOF_RESPONDER_FLAGS_TOA_OFFSET_MODE = BIT(6), + IWL_TOF_RESPONDER_FLAGS_COMMON_CALIB_MODE = BIT(7), + IWL_TOF_RESPONDER_FLAGS_SPECIFIC_CALIB_MODE = BIT(8), + IWL_TOF_RESPONDER_FLAGS_FAST_ALGO_SUPPORT = BIT(9), + IWL_TOF_RESPONDER_FLAGS_RETRY_ON_ALGO_FAIL = BIT(10), + IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT = RATE_MCS_ANT_ABC_MSK, +}; + +/** + * struct iwl_tof_responder_config_cmd - ToF AP mode (for debug) + * @cmd_valid_fields: &iwl_tof_responder_cmd_valid_field + * @responder_cfg_flags: &iwl_tof_responder_cfg_flags + * @bandwidth: current AP Bandwidth: &enum iwl_tof_bandwidth + * @rate: current AP rate + * @channel_num: current AP Channel + * @ctrl_ch_position: coding of the control channel position relative to + * the center frequency, see iwl_mvm_get_ctrl_pos() + * @sta_id: index of the AP STA when in AP mode + * @reserved1: reserved + * @toa_offset: Artificial addition [pSec] for the ToA - to be used for debug + * purposes, simulating station movement by adding various values + * to this field + * @common_calib: XVT: common calibration value + * @specific_calib: XVT: specific calibration value + * @bssid: Current AP BSSID + * @reserved2: reserved + */ +struct iwl_tof_responder_config_cmd { + __le32 cmd_valid_fields; + __le32 responder_cfg_flags; + u8 bandwidth; + u8 rate; + u8 channel_num; + u8 ctrl_ch_position; + u8 sta_id; + u8 reserved1; + __le16 toa_offset; + __le16 common_calib; + __le16 specific_calib; + u8 bssid[ETH_ALEN]; + __le16 reserved2; +} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_6 */ + +#define IWL_LCI_CIVIC_IE_MAX_SIZE 400 + +/** + * struct iwl_tof_responder_dyn_config_cmd - Dynamic responder settings + * @lci_len: The length of the 1st (LCI) part in the @lci_civic buffer + * @civic_len: The length of the 2nd (CIVIC) part in the @lci_civic buffer + * @lci_civic: The LCI/CIVIC buffer. LCI data (if exists) comes first, then, if + * needed, 0-padding such that the next part is dword-aligned, then CIVIC + * data (if exists) follows, and then 0-padding again to complete a + * 4-multiple long buffer. + */ +struct iwl_tof_responder_dyn_config_cmd { + __le32 lci_len; + __le32 civic_len; + u8 lci_civic[]; +} __packed; /* TOF_RESPONDER_DYN_CONFIG_CMD_API_S_VER_2 */ + +/** + * struct iwl_tof_range_request_ext_cmd - extended range req for WLS + * @tsf_timer_offset_msec: the recommended time offset (mSec) from the AP's TSF + * @reserved: reserved + * @min_delta_ftm: Minimal time between two consecutive measurements, + * in units of 100us. 0 means no preference by station + * @ftm_format_and_bw20M: FTM Channel Spacing/Format for 20MHz: recommended + * value be sent to the AP + * @ftm_format_and_bw40M: FTM Channel Spacing/Format for 40MHz: recommended + * value to be sent to the AP + * @ftm_format_and_bw80M: FTM Channel Spacing/Format for 80MHz: recommended + * value to be sent to the AP + */ +struct iwl_tof_range_req_ext_cmd { + __le16 tsf_timer_offset_msec; + __le16 reserved; + u8 min_delta_ftm; + u8 ftm_format_and_bw20M; + u8 ftm_format_and_bw40M; + u8 ftm_format_and_bw80M; +} __packed; + +/** + * enum iwl_tof_location_query - values for query bitmap + * @IWL_TOF_LOC_LCI: query LCI + * @IWL_TOF_LOC_CIVIC: query civic + */ +enum iwl_tof_location_query { + IWL_TOF_LOC_LCI = 0x01, + IWL_TOF_LOC_CIVIC = 0x02, +}; + + /** + * struct iwl_tof_range_req_ap_entry - AP configuration parameters + * @channel_num: Current AP Channel + * @bandwidth: Current AP Bandwidth. One of iwl_tof_bandwidth. + * @tsf_delta_direction: TSF relatively to the subject AP + * @ctrl_ch_position: Coding of the control channel position relative to the + * center frequency, see iwl_mvm_get_ctrl_pos(). + * @bssid: AP's BSSID + * @measure_type: Measurement type: 0 - two sided, 1 - One sided + * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of the + * number of measurement iterations (min 2^0 = 1, max 2^14) + * @burst_period: Recommended value to be sent to the AP. Measurement + * periodicity In units of 100ms. ignored if num_of_bursts = 0 + * @samples_per_burst: 2-sided: the number of FTMs pairs in single Burst (1-31); + * 1-sided: how many rts/cts pairs should be used per burst. + * @retries_per_sample: Max number of retries that the LMAC should send + * in case of no replies by the AP. + * @tsf_delta: TSF Delta in units of microseconds. + * The difference between the AP TSF and the device local clock. + * @location_req: Location Request Bit[0] LCI should be sent in the FTMR; + * Bit[1] Civic should be sent in the FTMR + * @asap_mode: 0 - non asap mode, 1 - asap mode (not relevant for one sided) + * @enable_dyn_ack: Enable Dynamic ACK BW. + * 0: Initiator interact with regular AP; + * 1: Initiator interact with Responder machine: need to send the + * Initiator Acks with HT 40MHz / 80MHz, since the Responder should + * use it for its ch est measurement (this flag will be set when we + * configure the opposite machine to be Responder). + * @rssi: Last received value + * legal values: -128-0 (0x7f). above 0x0 indicating an invalid value. + * @algo_type: &enum iwl_tof_algo_type + * @notify_mcsi: &enum iwl_tof_mcsi_ntfy. + * @reserved: For alignment and future use + */ +struct iwl_tof_range_req_ap_entry { + u8 channel_num; + u8 bandwidth; + u8 tsf_delta_direction; + u8 ctrl_ch_position; + u8 bssid[ETH_ALEN]; + u8 measure_type; + u8 num_of_bursts; + __le16 burst_period; + u8 samples_per_burst; + u8 retries_per_sample; + __le32 tsf_delta; + u8 location_req; + u8 asap_mode; + u8 enable_dyn_ack; + s8 rssi; + u8 algo_type; + u8 notify_mcsi; + __le16 reserved; +} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_3 */ + +/** + * enum iwl_tof_response_mode + * @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as + * possible (not supported for this release) + * @IWL_MVM_TOF_RESPONSE_TIMEOUT: report all AP measurements as a batch upon + * timeout expiration + * @IWL_MVM_TOF_RESPONSE_COMPLETE: report all AP measurements as a batch at the + * earlier of: measurements completion / timeout + * expiration. + */ +enum iwl_tof_response_mode { + IWL_MVM_TOF_RESPONSE_ASAP, + IWL_MVM_TOF_RESPONSE_TIMEOUT, + IWL_MVM_TOF_RESPONSE_COMPLETE, +}; + +/** + * enum iwl_tof_initiator_flags + * + * @IWL_TOF_INITIATOR_FLAGS_FAST_ALGO_DISABLED: disable fast algo, meaning run + * the algo on ant A+B, instead of only one of them. + * @IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_A: open RX antenna A for FTMs RX + * @IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_B: open RX antenna B for FTMs RX + * @IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_C: open RX antenna C for FTMs RX + * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_A: use antenna A fo TX ACKs during FTM + * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_B: use antenna B fo TX ACKs during FTM + * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_C: use antenna C fo TX ACKs during FTM + * @IWL_TOF_INITIATOR_FLAGS_MINDELTA_NO_PREF: no preference for minDeltaFTM + */ +enum iwl_tof_initiator_flags { + IWL_TOF_INITIATOR_FLAGS_FAST_ALGO_DISABLED = BIT(0), + IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_A = BIT(1), + IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_B = BIT(2), + IWL_TOF_INITIATOR_FLAGS_RX_CHAIN_SEL_C = BIT(3), + IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_A = BIT(4), + IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_B = BIT(5), + IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_C = BIT(6), + IWL_TOF_INITIATOR_FLAGS_MINDELTA_NO_PREF = BIT(7), +}; /* LOCATION_RANGE_REQ_CMD_API_S_VER_5 */ + +#define IWL_MVM_TOF_MAX_APS 5 +#define IWL_MVM_TOF_MAX_TWO_SIDED_APS 5 + +/** + * struct iwl_tof_range_req_cmd - start measurement cmd + * @initiator_flags: see flags @ iwl_tof_initiator_flags + * @request_id: A Token incremented per request. The same Token will be + * sent back in the range response + * @initiator: 0- NW initiated, 1 - Client Initiated + * @one_sided_los_disable: '0'- run ML-Algo for both ToF/OneSided, + * '1' - run ML-Algo for ToF only + * @req_timeout: Requested timeout of the response in units of 100ms. + * This is equivalent to the session time configured to the + * LMAC in Initiator Request + * @report_policy: Supported partially for this release: For current release - + * the range report will be uploaded as a batch when ready or + * when the session is done (successfully / partially). + * one of iwl_tof_response_mode. + * @reserved0: reserved + * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS) + * @macaddr_random: '0' Use default source MAC address (i.e. p2_p), + * '1' Use MAC Address randomization according to the below + * @range_req_bssid: ranging request BSSID + * @macaddr_template: MAC address template to use for non-randomized bits + * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template. + * Bits set to 1 shall be randomized by the UMAC + * @ftm_rx_chains: Rx chain to open to receive Responder's FTMs (XVT) + * @ftm_tx_chains: Tx chain to send the ack to the Responder FTM (XVT) + * @common_calib: The common calib value to inject to this measurement calc + * @specific_calib: The specific calib value to inject to this measurement calc + * @ap: per-AP request data + */ +struct iwl_tof_range_req_cmd { + __le32 initiator_flags; + u8 request_id; + u8 initiator; + u8 one_sided_los_disable; + u8 req_timeout; + u8 report_policy; + u8 reserved0; + u8 num_of_ap; + u8 macaddr_random; + u8 range_req_bssid[ETH_ALEN]; + u8 macaddr_template[ETH_ALEN]; + u8 macaddr_mask[ETH_ALEN]; + u8 ftm_rx_chains; + u8 ftm_tx_chains; + __le16 common_calib; + __le16 specific_calib; + struct iwl_tof_range_req_ap_entry ap[IWL_MVM_TOF_MAX_APS]; +} __packed; +/* LOCATION_RANGE_REQ_CMD_API_S_VER_5 */ + +/* + * enum iwl_tof_range_request_status - status of the sent request + * @IWL_TOF_RANGE_REQUEST_STATUS_SUCCESSFUL - FW successfully received the + * request + * @IWL_TOF_RANGE_REQUEST_STATUS_BUSY - FW is busy with a previous request, the + * sent request will not be handled + */ +enum iwl_tof_range_request_status { + IWL_TOF_RANGE_REQUEST_STATUS_SUCCESS, + IWL_TOF_RANGE_REQUEST_STATUS_BUSY, +}; + +/** + * enum iwl_tof_entry_status + * + * @IWL_TOF_ENTRY_SUCCESS: successful measurement. + * @IWL_TOF_ENTRY_GENERAL_FAILURE: General failure. + * @IWL_TOF_ENTRY_NO_RESPONSE: Responder didn't reply to the request. + * @IWL_TOF_ENTRY_REQUEST_REJECTED: Responder rejected the request. + * @IWL_TOF_ENTRY_NOT_SCHEDULED: Time event was scheduled but not called yet. + * @IWL_TOF_ENTRY_TIMING_MEASURE_TIMEOUT: Time event triggered but no + * measurement was completed. + * @IWL_TOF_ENTRY_TARGET_DIFF_CH_CANNOT_CHANGE: No range due inability to switch + * from the primary channel. + * @IWL_TOF_ENTRY_RANGE_NOT_SUPPORTED: Device doesn't support FTM. + * @IWL_TOF_ENTRY_REQUEST_ABORT_UNKNOWN_REASON: Request aborted due to unknown + * reason. + * @IWL_TOF_ENTRY_LOCATION_INVALID_T1_T4_TIME_STAMP: Failure due to invalid + * T1/T4. + * @IWL_TOF_ENTRY_11MC_PROTOCOL_FAILURE: Failure due to invalid FTM frame + * structure. + * @IWL_TOF_ENTRY_REQUEST_CANNOT_SCHED: Request cannot be scheduled. + * @IWL_TOF_ENTRY_RESPONDER_CANNOT_COLABORATE: Responder cannot serve the + * initiator for some period, period supplied in @refusal_period. + * @IWL_TOF_ENTRY_BAD_REQUEST_ARGS: Bad request arguments. + * @IWL_TOF_ENTRY_WIFI_NOT_ENABLED: Wifi not enabled. + * @IWL_TOF_ENTRY_RESPONDER_OVERRIDE_PARAMS: Responder override the original + * parameters within the current session. + */ +enum iwl_tof_entry_status { + IWL_TOF_ENTRY_SUCCESS = 0, + IWL_TOF_ENTRY_GENERAL_FAILURE = 1, + IWL_TOF_ENTRY_NO_RESPONSE = 2, + IWL_TOF_ENTRY_REQUEST_REJECTED = 3, + IWL_TOF_ENTRY_NOT_SCHEDULED = 4, + IWL_TOF_ENTRY_TIMING_MEASURE_TIMEOUT = 5, + IWL_TOF_ENTRY_TARGET_DIFF_CH_CANNOT_CHANGE = 6, + IWL_TOF_ENTRY_RANGE_NOT_SUPPORTED = 7, + IWL_TOF_ENTRY_REQUEST_ABORT_UNKNOWN_REASON = 8, + IWL_TOF_ENTRY_LOCATION_INVALID_T1_T4_TIME_STAMP = 9, + IWL_TOF_ENTRY_11MC_PROTOCOL_FAILURE = 10, + IWL_TOF_ENTRY_REQUEST_CANNOT_SCHED = 11, + IWL_TOF_ENTRY_RESPONDER_CANNOT_COLABORATE = 12, + IWL_TOF_ENTRY_BAD_REQUEST_ARGS = 13, + IWL_TOF_ENTRY_WIFI_NOT_ENABLED = 14, + IWL_TOF_ENTRY_RESPONDER_OVERRIDE_PARAMS = 15, +}; /* LOCATION_RANGE_RSP_AP_ENTRY_NTFY_API_S_VER_2 */ + +/** + * struct iwl_tof_range_rsp_ap_entry_ntfy - AP parameters (response) + * @bssid: BSSID of the AP + * @measure_status: current APs measurement status, one of + * &enum iwl_tof_entry_status. + * @measure_bw: Current AP Bandwidth: 0 20MHz, 1 40MHz, 2 80MHz + * @rtt: The Round Trip Time that took for the last measurement for + * current AP [pSec] + * @rtt_variance: The Variance of the RTT values measured for current AP + * @rtt_spread: The Difference between the maximum and the minimum RTT + * values measured for current AP in the current session [pSec] + * @rssi: RSSI as uploaded in the Channel Estimation notification + * @rssi_spread: The Difference between the maximum and the minimum RSSI values + * measured for current AP in the current session + * @reserved: reserved + * @refusal_period: refusal period in case of + * @IWL_TOF_ENTRY_RESPONDER_CANNOT_COLABORATE [sec] + * @range: Measured range [cm] + * @range_variance: Measured range variance [cm] + * @timestamp: The GP2 Clock [usec] where Channel Estimation notification was + * uploaded by the LMAC + * @t2t3_initiator: as calculated from the algo in the initiator + * @t1t4_responder: as calculated from the algo in the responder + * @common_calib: Calib val that was used in for this AP measurement + * @specific_calib: val that was used in for this AP measurement + * @papd_calib_output: The result of the tof papd calibration that was injected + * into the algorithm. + */ +struct iwl_tof_range_rsp_ap_entry_ntfy { + u8 bssid[ETH_ALEN]; + u8 measure_status; + u8 measure_bw; + __le32 rtt; + __le32 rtt_variance; + __le32 rtt_spread; + s8 rssi; + u8 rssi_spread; + u8 reserved; + u8 refusal_period; + __le32 range; + __le32 range_variance; + __le32 timestamp; + __le32 t2t3_initiator; + __le32 t1t4_responder; + __le16 common_calib; + __le16 specific_calib; + __le32 papd_calib_output; +} __packed; /* LOCATION_RANGE_RSP_AP_ETRY_NTFY_API_S_VER_3 */ + +/** + * enum iwl_tof_response_status - tof response status + * + * @IWL_TOF_RESPONSE_SUCCESS: successful range. + * @IWL_TOF_RESPONSE_TIMEOUT: request aborted due to timeout expiration. + * partial result of ranges done so far is included in the response. + * @IWL_TOF_RESPONSE_ABORTED: Measurement aborted by command. + * @IWL_TOF_RESPONSE_FAILED: Measurement request command failed. + */ +enum iwl_tof_response_status { + IWL_TOF_RESPONSE_SUCCESS = 0, + IWL_TOF_RESPONSE_TIMEOUT = 1, + IWL_TOF_RESPONSE_ABORTED = 4, + IWL_TOF_RESPONSE_FAILED = 5, +}; /* LOCATION_RNG_RSP_STATUS */ + +/** + * struct iwl_tof_range_rsp_ntfy - ranging response notification + * @request_id: A Token ID of the corresponding Range request + * @request_status: status of current measurement session, one of + * &enum iwl_tof_response_status. + * @last_in_batch: reprot policy (when not all responses are uploaded at once) + * @num_of_aps: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS) + * @ap: per-AP data + */ +struct iwl_tof_range_rsp_ntfy { + u8 request_id; + u8 request_status; + u8 last_in_batch; + u8 num_of_aps; + struct iwl_tof_range_rsp_ap_entry_ntfy ap[IWL_MVM_TOF_MAX_APS]; +} __packed; + +#define IWL_MVM_TOF_MCSI_BUF_SIZE (245) +/** + * struct iwl_tof_mcsi_notif - used for debug + * @token: token ID for the current session + * @role: '0' - initiator, '1' - responder + * @reserved: reserved + * @initiator_bssid: initiator machine + * @responder_bssid: responder machine + * @mcsi_buffer: debug data + */ +struct iwl_tof_mcsi_notif { + u8 token; + u8 role; + __le16 reserved; + u8 initiator_bssid[ETH_ALEN]; + u8 responder_bssid[ETH_ALEN]; + u8 mcsi_buffer[IWL_MVM_TOF_MCSI_BUF_SIZE * 4]; +} __packed; + +/** + * struct iwl_tof_range_abort_cmd + * @request_id: corresponds to a range request + * @reserved: reserved + */ +struct iwl_tof_range_abort_cmd { + u8 request_id; + u8 reserved[3]; +} __packed; + +enum ftm_responder_stats_flags { + FTM_RESP_STAT_NON_ASAP_STARTED = BIT(0), + FTM_RESP_STAT_NON_ASAP_IN_WIN = BIT(1), + FTM_RESP_STAT_NON_ASAP_OUT_WIN = BIT(2), + FTM_RESP_STAT_TRIGGER_DUP = BIT(3), + FTM_RESP_STAT_DUP = BIT(4), + FTM_RESP_STAT_DUP_IN_WIN = BIT(5), + FTM_RESP_STAT_DUP_OUT_WIN = BIT(6), + FTM_RESP_STAT_SCHED_SUCCESS = BIT(7), + FTM_RESP_STAT_ASAP_REQ = BIT(8), + FTM_RESP_STAT_NON_ASAP_REQ = BIT(9), + FTM_RESP_STAT_ASAP_RESP = BIT(10), + FTM_RESP_STAT_NON_ASAP_RESP = BIT(11), + FTM_RESP_STAT_FAIL_INITIATOR_INACTIVE = BIT(12), + FTM_RESP_STAT_FAIL_INITIATOR_OUT_WIN = BIT(13), + FTM_RESP_STAT_FAIL_INITIATOR_RETRY_LIM = BIT(14), + FTM_RESP_STAT_FAIL_NEXT_SERVED = BIT(15), + FTM_RESP_STAT_FAIL_TRIGGER_ERR = BIT(16), + FTM_RESP_STAT_FAIL_GC = BIT(17), + FTM_RESP_STAT_SUCCESS = BIT(18), + FTM_RESP_STAT_INTEL_IE = BIT(19), + FTM_RESP_STAT_INITIATOR_ACTIVE = BIT(20), + FTM_RESP_STAT_MEASUREMENTS_AVAILABLE = BIT(21), + FTM_RESP_STAT_TRIGGER_UNKNOWN = BIT(22), + FTM_RESP_STAT_PROCESS_FAIL = BIT(23), + FTM_RESP_STAT_ACK = BIT(24), + FTM_RESP_STAT_NACK = BIT(25), + FTM_RESP_STAT_INVALID_INITIATOR_ID = BIT(26), + FTM_RESP_STAT_TIMER_MIN_DELTA = BIT(27), + FTM_RESP_STAT_INITIATOR_REMOVED = BIT(28), + FTM_RESP_STAT_INITIATOR_ADDED = BIT(29), + FTM_RESP_STAT_ERR_LIST_FULL = BIT(30), + FTM_RESP_STAT_INITIATOR_SCHED_NOW = BIT(31), +}; /* RESP_IND_E */ + +/** + * struct iwl_ftm_responder_stats - FTM responder statistics + * @addr: initiator address + * @success_ftm: number of successful ftm frames + * @ftm_per_burst: num of FTM frames that were received + * @flags: &enum ftm_responder_stats_flags + * @duration: actual duration of FTM + * @allocated_duration: time that was allocated for this FTM session + * @bw: FTM request bandwidth + * @rate: FTM request rate + * @reserved: for alingment and future use + */ +struct iwl_ftm_responder_stats { + u8 addr[ETH_ALEN]; + u8 success_ftm; + u8 ftm_per_burst; + __le32 flags; + __le32 duration; + __le32 allocated_duration; + u8 bw; + u8 rate; + __le16 reserved; +} __packed; /* TOF_RESPONDER_STATISTICS_NTFY_S_VER_2 */ + +#endif /* __iwl_fw_api_location_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 851bfb259f10..e3eb812e0248 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -91,6 +91,7 @@ #include "fw/api/sf.h" #include "fw/api/sta.h" #include "fw/api/stats.h" +#include "fw/api/location.h" #include "fw/api/tx.h" #endif /* __fw_api_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 93b7cc5910fe..c7ec36ceb661 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -457,6 +457,22 @@ static const struct iwl_hcmd_names iwl_mvm_debug_names[] = { HCMD_NAME(MFU_ASSERT_DUMP_NTF), }; +/* Please keep this array *SORTED* by hex value. + * Access is done through binary search + */ +static const struct iwl_hcmd_names iwl_mvm_location_names[] = { + HCMD_NAME(TOF_RANGE_REQ_CMD), + HCMD_NAME(TOF_CONFIG_CMD), + HCMD_NAME(TOF_RANGE_ABORT_CMD), + HCMD_NAME(TOF_RANGE_REQ_EXT_CMD), + HCMD_NAME(TOF_RESPONDER_CONFIG_CMD), + HCMD_NAME(TOF_RESPONDER_DYN_CONFIG_CMD), + HCMD_NAME(TOF_LC_NOTIF), + HCMD_NAME(TOF_RESPONDER_STATS), + HCMD_NAME(TOF_MCSI_DEBUG_NOTIF), + HCMD_NAME(TOF_RANGE_RESPONSE_NOTIF), +}; + /* Please keep this array *SORTED* by hex value. * Access is done through binary search */ @@ -479,6 +495,7 @@ static const struct iwl_hcmd_arr iwl_mvm_groups[] = { [MAC_CONF_GROUP] = HCMD_ARR(iwl_mvm_mac_conf_names), [PHY_OPS_GROUP] = HCMD_ARR(iwl_mvm_phy_names), [DATA_PATH_GROUP] = HCMD_ARR(iwl_mvm_data_path_names), + [LOCATION_GROUP] = HCMD_ARR(iwl_mvm_location_names), [PROT_OFFLOAD_GROUP] = HCMD_ARR(iwl_mvm_prot_offload_names), [REGULATORY_AND_NVM_GROUP] = HCMD_ARR(iwl_mvm_regulatory_and_nvm_names), From 5213e8a8a28d2c4c143fec94e57c866a958ed52d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Oct 2018 09:15:21 +0200 Subject: [PATCH 0291/1436] iwlwifi: mvm: implement CSI reporting Implement CSI (channel estimation matrix) reporting in the mvm driver, if the firmware has the capability. Currently only a debugfs API is provided as the API is still under discussion. For now, RX aggregation must be disabled to use this feature on data frames as we haven't found a good way to attach the data to A-MPDUs, given complexities with multi-queue. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/datapath.h | 55 +++++++++++++++++++ .../wireless/intel/iwlwifi/fw/api/location.h | 20 +++++++ .../net/wireless/intel/iwlwifi/fw/api/rx.h | 2 + drivers/net/wireless/intel/iwlwifi/fw/file.h | 7 ++- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 20 +++---- 7 files changed, 94 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h index fdc54a5dc9de..93c06e6c1ced 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h @@ -104,6 +104,12 @@ enum iwl_data_path_subcmd_ids { */ HE_AIR_SNIFFER_CONFIG_CMD = 0x13, + /** + * @CHEST_COLLECTOR_FILTER_CONFIG_CMD: Configure the CSI + * matrix collection, uses &struct iwl_channel_estimation_cfg + */ + CHEST_COLLECTOR_FILTER_CONFIG_CMD = 0x14, + /** * @RX_NO_DATA_NOTIF: &struct iwl_rx_no_data */ @@ -156,4 +162,53 @@ struct iwl_mu_group_mgmt_notif { __le32 user_position[4]; } __packed; /* MU_GROUP_MNG_NTFY_API_S_VER_1 */ +enum iwl_channel_estimation_flags { + IWL_CHANNEL_ESTIMATION_ENABLE = BIT(0), + IWL_CHANNEL_ESTIMATION_TIMER = BIT(1), + IWL_CHANNEL_ESTIMATION_COUNTER = BIT(2), +}; + +/** + * struct iwl_channel_estimation_cfg - channel estimation reporting config + */ +struct iwl_channel_estimation_cfg { + /** + * @flags: flags, see &enum iwl_channel_estimation_flags + */ + __le32 flags; + /** + * @timer: if enabled via flags, automatically disable after this many + * microseconds + */ + __le32 timer; + /** + * @count: if enabled via flags, automatically disable after this many + * frames with channel estimation matrix were captured + */ + __le32 count; + /** + * @rate_n_flags_mask: only try to record the channel estimation matrix + * if the rate_n_flags value for the received frame (let's call + * that rx_rnf) matches the mask/value given here like this: + * (rx_rnf & rate_n_flags_mask) == rate_n_flags_val. + */ + __le32 rate_n_flags_mask; + /** + * @rate_n_flags_val: see @rate_n_flags_mask + */ + __le32 rate_n_flags_val; + /** + * @reserved: reserved (for alignment) + */ + __le32 reserved; + /** + * @frame_types: bitmap of frame types to capture, the received frame's + * subtype|type takes 6 bits in the frame and the corresponding bit + * in this field must be set to 1 to capture channel estimation for + * that frame type. Set to all-ones to enable capturing for all + * frame types. + */ + __le64 frame_types; +} __packed; /* CHEST_COLLECTOR_FILTER_CMD_API_S_VER_1 */ + #endif /* __iwl_fw_api_datapath_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h index 0df0851b9513..6da91ec0df55 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h @@ -93,6 +93,15 @@ enum iwl_location_subcmd_ids { * uses &struct iwl_tof_responder_dyn_config_cmd */ TOF_RESPONDER_DYN_CONFIG_CMD = 0x5, + /** + * @CSI_HEADER_NOTIFICATION: CSI header + */ + CSI_HEADER_NOTIFICATION = 0xFA, + /** + * @CSI_CHUNKS_NOTIFICATION: CSI chunk, + * uses &struct iwl_csi_chunk_notification + */ + CSI_CHUNKS_NOTIFICATION = 0xFB, /** * @TOF_LC_NOTIF: used for LCI/civic location, contains just * the action frame @@ -688,4 +697,15 @@ struct iwl_ftm_responder_stats { __le16 reserved; } __packed; /* TOF_RESPONDER_STATISTICS_NTFY_S_VER_2 */ +#define IWL_CSI_CHUNK_CTL_NUM_MASK 0x3 +#define IWL_CSI_CHUNK_CTL_IDX_MASK 0xc + +struct iwl_csi_chunk_notification { + __le32 token; + __le16 seq; + __le16 ctl; + __le32 size; + u8 data[]; +} __packed; /* CSI_CHUNKS_HDR_NTFY_API_S_VER_1 */ + #endif /* __iwl_fw_api_location_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h index 11c25f32a286..6e8224ce8906 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h @@ -333,6 +333,8 @@ enum iwl_rx_mpdu_phy_info { IWL_RX_MPDU_PHY_AMPDU = BIT(5), IWL_RX_MPDU_PHY_AMPDU_TOGGLE = BIT(6), IWL_RX_MPDU_PHY_SHORT_PREAMBLE = BIT(7), + /* short preamble is only for CCK, for non-CCK overridden by this */ + IWL_RX_MPDU_PHY_NCCK_ADDTL_NTFY = BIT(7), IWL_RX_MPDU_PHY_TSF_OVERLOAD = BIT(8), }; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index dfe02bdb7481..f73c5c697690 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -356,10 +356,13 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_TX_POWER_ACK: reduced TX power API has larger * command size (command version 4) that supports toggling ACK TX * power reduction. - * @IWL_UCODE_TLV_CAPA_MLME_OFFLOAD: supports MLME offload * @IWL_UCODE_TLV_CAPA_D3_DEBUG: supports debug recording during D3 * @IWL_UCODE_TLV_CAPA_MCC_UPDATE_11AX_SUPPORT: MCC response support 11ax * capability. + * @IWL_UCODE_TLV_CAPA_CSI_REPORTING: firmware is capable of being configured + * to report the CSI information with (certain) RX frames + * + * @IWL_UCODE_TLV_CAPA_MLME_OFFLOAD: supports MLME offload * * @NUM_IWL_UCODE_TLV_CAPA: number of bits used */ @@ -410,6 +413,8 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_D3_DEBUG = (__force iwl_ucode_tlv_capa_t)87, IWL_UCODE_TLV_CAPA_LED_CMD_SUPPORT = (__force iwl_ucode_tlv_capa_t)88, IWL_UCODE_TLV_CAPA_MCC_UPDATE_11AX_SUPPORT = (__force iwl_ucode_tlv_capa_t)89, + IWL_UCODE_TLV_CAPA_CSI_REPORTING = (__force iwl_ucode_tlv_capa_t)90, + IWL_UCODE_TLV_CAPA_MLME_OFFLOAD = (__force iwl_ucode_tlv_capa_t)96, NUM_IWL_UCODE_TLV_CAPA diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index cbbd4b076e17..91b46361ac9f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -69,6 +69,7 @@ #include "sta.h" #include "iwl-io.h" #include "debugfs.h" +#include "iwl-modparams.h" #include "fw/error-dump.h" static ssize_t iwl_dbgfs_ctdp_budget_read(struct file *file, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index c7ec36ceb661..5e4f8b767d10 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -445,6 +445,7 @@ static const struct iwl_hcmd_names iwl_mvm_data_path_names[] = { HCMD_NAME(TRIGGER_RX_QUEUES_NOTIF_CMD), HCMD_NAME(STA_HE_CTXT_CMD), HCMD_NAME(RFH_QUEUE_CONFIG_CMD), + HCMD_NAME(CHEST_COLLECTOR_FILTER_CONFIG_CMD), HCMD_NAME(STA_PM_NOTIF), HCMD_NAME(MU_GROUP_MGMT_NOTIF), HCMD_NAME(RX_QUEUES_NOTIFICATION), diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 120e3f2d21aa..7b6f4585fec9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -201,18 +201,10 @@ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); if (!(rx_status->flag & RX_FLAG_NO_PSDU) && - iwl_mvm_check_pn(mvm, skb, queue, sta)) { + iwl_mvm_check_pn(mvm, skb, queue, sta)) kfree_skb(skb); - } else { - unsigned int radiotap_len = 0; - - if (rx_status->flag & RX_FLAG_RADIOTAP_HE) - radiotap_len += sizeof(struct ieee80211_radiotap_he); - if (rx_status->flag & RX_FLAG_RADIOTAP_HE_MU) - radiotap_len += sizeof(struct ieee80211_radiotap_he_mu); - __skb_push(skb, radiotap_len); + else ieee80211_rx_napi(mvm->hw, sta, skb, napi); - } } static void iwl_mvm_get_signal_strength(struct iwl_mvm *mvm, @@ -1438,9 +1430,15 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - /* toggle is switched whenever new aggregation starts */ + /* + * Toggle is switched whenever new aggregation starts. Make + * sure ampdu_reference is never 0 so we can later use it to + * see if the frame was really part of an A-MPDU or not. + */ if (toggle_bit != mvm->ampdu_toggle) { mvm->ampdu_ref++; + if (mvm->ampdu_ref == 0) + mvm->ampdu_ref++; mvm->ampdu_toggle = toggle_bit; } rx_status->ampdu_reference = mvm->ampdu_ref; From 57e861d9362ed303ac5c0df1bc11760979de9f55 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Sun, 18 Nov 2018 18:01:43 +0200 Subject: [PATCH 0292/1436] iwlwifi: mvm: Change FW channel info API Change iwl_fw_channel_info structure so it can have channel number greater than 255. This is needed for 6 GHz channel numbers. Change all relevant structs and member accesses accordingly. The new API is indicated by a TLV capability bit. Signed-off-by: David Spinadel Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/fw/api/phy-ctxt.h | 51 +++++++++++++---- .../net/wireless/intel/iwlwifi/fw/api/tdls.h | 19 +++++-- .../intel/iwlwifi/fw/api/time-event.h | 34 ++++++++---- drivers/net/wireless/intel/iwlwifi/fw/file.h | 3 + .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 30 ++++++---- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 55 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 24 ++++---- drivers/net/wireless/intel/iwlwifi/mvm/tdls.c | 33 +++++------ .../wireless/intel/iwlwifi/mvm/time-event.c | 4 +- 9 files changed, 182 insertions(+), 71 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h index 45f61c6af14e..b833b80ea3d6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,6 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,17 +97,36 @@ #define PHY_VHT_CTRL_POS_4_ABOVE (0x7) /* + * struct iwl_fw_channel_info_v1 - channel information + * * @band: PHY_BAND_* * @channel: channel number * @width: PHY_[VHT|LEGACY]_CHANNEL_* * @ctrl channel: PHY_[VHT|LEGACY]_CTRL_* */ -struct iwl_fw_channel_info { +struct iwl_fw_channel_info_v1 { u8 band; u8 channel; u8 width; u8 ctrl_pos; -} __packed; +} __packed; /* CHANNEL_CONFIG_API_S_VER_1 */ + +/* + * struct iwl_fw_channel_info - channel information + * + * @channel: channel number + * @band: PHY_BAND_* + * @width: PHY_[VHT|LEGACY]_CHANNEL_* + * @ctrl channel: PHY_[VHT|LEGACY]_CTRL_* + * @reserved: for future use and alignment + */ +struct iwl_fw_channel_info { + __le32 channel; + u8 band; + u8 width; + u8 ctrl_pos; + u8 reserved; +} __packed; /*CHANNEL_CONFIG_API_S_VER_2 */ #define PHY_RX_CHAIN_DRIVER_FORCE_POS (0) #define PHY_RX_CHAIN_DRIVER_FORCE_MSK \ @@ -133,6 +154,22 @@ struct iwl_fw_channel_info { #define NUM_PHY_CTX 3 /* TODO: complete missing documentation */ +/** + * struct iwl_phy_context_cmd_tail - tail of iwl_phy_ctx_cmd for alignment with + * various channel structures. + * + * @txchain_info: ??? + * @rxchain_info: ??? + * @acquisition_data: ??? + * @dsp_cfg_flags: set to 0 + */ +struct iwl_phy_context_cmd_tail { + __le32 txchain_info; + __le32 rxchain_info; + __le32 acquisition_data; + __le32 dsp_cfg_flags; +} __packed; + /** * struct iwl_phy_context_cmd - config of the PHY context * ( PHY_CONTEXT_CMD = 0x8 ) @@ -142,10 +179,7 @@ struct iwl_fw_channel_info { * other value means apply new params after X usecs * @tx_param_color: ??? * @ci: channel info - * @txchain_info: ??? - * @rxchain_info: ??? - * @acquisition_data: ??? - * @dsp_cfg_flags: set to 0 + * @tail: command tail */ struct iwl_phy_context_cmd { /* COMMON_INDEX_HDR_API_S_VER_1 */ @@ -155,10 +189,7 @@ struct iwl_phy_context_cmd { __le32 apply_time; __le32 tx_param_color; struct iwl_fw_channel_info ci; - __le32 txchain_info; - __le32 rxchain_info; - __le32 acquisition_data; - __le32 dsp_cfg_flags; + struct iwl_phy_context_cmd_tail tail; } __packed; /* PHY_CONTEXT_CMD_API_VER_1 */ #endif /* __iwl_fw_api_phy_ctxt_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h index 7c6c2462d0e8..b089285ac466 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,6 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -110,6 +112,17 @@ struct iwl_tdls_channel_switch_frame { u8 data[IWL_TDLS_CH_SW_FRAME_MAX_SIZE]; } __packed; /* TDLS_STA_CHANNEL_SWITCH_FRAME_API_S_VER_1 */ +/** + * struct iwl_tdls_channel_switch_cmd_tail - tail of iwl_tdls_channel_switch_cmd + * + * @timing: timing related data for command + * @frame: channel-switch request/response template, depending to switch_type + */ +struct iwl_tdls_channel_switch_cmd_tail { + struct iwl_tdls_channel_switch_timing timing; + struct iwl_tdls_channel_switch_frame frame; +} __packed; + /** * struct iwl_tdls_channel_switch_cmd - TDLS channel switch command * @@ -119,15 +132,13 @@ struct iwl_tdls_channel_switch_frame { * @switch_type: see &enum iwl_tdls_channel_switch_type * @peer_sta_id: station id of TDLS peer * @ci: channel we switch to - * @timing: timing related data for command - * @frame: channel-switch request/response template, depending to switch_type + * @tail: command tail */ struct iwl_tdls_channel_switch_cmd { u8 switch_type; __le32 peer_sta_id; struct iwl_fw_channel_info ci; - struct iwl_tdls_channel_switch_timing timing; - struct iwl_tdls_channel_switch_frame frame; + struct iwl_tdls_channel_switch_cmd_tail tail; } __packed; /* TDLS_STA_CHANNEL_SWITCH_CMD_API_S_VER_1 */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h index f824bebceb06..4621ef93a2cf 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,6 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -317,6 +319,25 @@ struct iwl_time_event_notif { __le32 status; } __packed; /* MAC_TIME_EVENT_NTFY_API_S_VER_1 */ +/* + * struct iwl_hs20_roc_req_tail - tail of iwl_hs20_roc_req + * + * @node_addr: Our MAC Address + * @reserved: reserved for alignment + * @apply_time: GP2 value to start (should always be the current GP2 value) + * @apply_time_max_delay: Maximum apply time delay value in TU. Defines max + * time by which start of the event is allowed to be postponed. + * @duration: event duration in TU To calculate event duration: + * timeEventDuration = min(duration, remainingQuota) + */ +struct iwl_hs20_roc_req_tail { + u8 node_addr[ETH_ALEN]; + __le16 reserved; + __le32 apply_time; + __le32 apply_time_max_delay; + __le32 duration; +} __packed; + /* * Aux ROC command * @@ -336,13 +357,6 @@ struct iwl_time_event_notif { * @sta_id_and_color: station id and color, resumed during "Remain On Channel" * activity. * @channel_info: channel info - * @node_addr: Our MAC Address - * @reserved: reserved for alignment - * @apply_time: GP2 value to start (should always be the current GP2 value) - * @apply_time_max_delay: Maximum apply time delay value in TU. Defines max - * time by which start of the event is allowed to be postponed. - * @duration: event duration in TU To calculate event duration: - * timeEventDuration = min(duration, remainingQuota) */ struct iwl_hs20_roc_req { /* COMMON_INDEX_HDR_API_S_VER_1 hdr */ @@ -351,11 +365,7 @@ struct iwl_hs20_roc_req { __le32 event_unique_id; __le32 sta_id_and_color; struct iwl_fw_channel_info channel_info; - u8 node_addr[ETH_ALEN]; - __le16 reserved; - __le32 apply_time; - __le32 apply_time_max_delay; - __le32 duration; + struct iwl_hs20_roc_req_tail tail; } __packed; /* HOT_SPOT_CMD_API_S_VER_1 */ /* diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index f73c5c697690..e8b00b795cbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -333,6 +333,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_TLC_OFFLOAD: firmware implements rate scaling algorithm * @IWL_UCODE_TLV_CAPA_DYNAMIC_QUOTA: firmware implements quota related * @IWL_UCODE_TLV_CAPA_COEX_SCHEMA_2: firmware implements Coex Schema 2 + * @IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS: firmware supports ultra high band + * (6 GHz). * @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement * @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT @@ -395,6 +397,7 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_TLC_OFFLOAD = (__force iwl_ucode_tlv_capa_t)43, IWL_UCODE_TLV_CAPA_DYNAMIC_QUOTA = (__force iwl_ucode_tlv_capa_t)44, IWL_UCODE_TLV_CAPA_COEX_SCHEMA_2 = (__force iwl_ucode_tlv_capa_t)45, + IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS = (__force iwl_ucode_tlv_capa_t)48, IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE = (__force iwl_ucode_tlv_capa_t)64, IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS = (__force iwl_ucode_tlv_capa_t)65, IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT = (__force iwl_ucode_tlv_capa_t)67, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index dd0761e5cf6a..1762b1fcb3db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3520,14 +3520,20 @@ static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm, .id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(MAC_INDEX_AUX, 0)), .sta_id_and_color = cpu_to_le32(mvm->aux_sta.sta_id), - /* Set the channel info data */ - .channel_info.band = (channel->band == NL80211_BAND_2GHZ) ? - PHY_BAND_24 : PHY_BAND_5, - .channel_info.channel = channel->hw_value, - .channel_info.width = PHY_VHT_CHANNEL_MODE20, - /* Set the time and duration */ - .apply_time = cpu_to_le32(iwl_read_prph(mvm->trans, time_reg)), - }; + }; + struct iwl_hs20_roc_req_tail *tail = iwl_mvm_chan_info_cmd_tail(mvm, + &aux_roc_req.channel_info); + u16 len = sizeof(aux_roc_req) - iwl_mvm_chan_info_padding(mvm); + + /* Set the channel info data */ + iwl_mvm_set_chan_info(mvm, &aux_roc_req.channel_info, channel->hw_value, + (channel->band == NL80211_BAND_2GHZ) ? + PHY_BAND_24 : PHY_BAND_5, + PHY_VHT_CHANNEL_MODE20, + 0); + + /* Set the time and duration */ + tail->apply_time = cpu_to_le32(iwl_read_prph(mvm->trans, time_reg)); delay = AUX_ROC_MIN_DELAY; req_dur = MSEC_TO_TU(duration); @@ -3552,15 +3558,15 @@ static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm, } } - aux_roc_req.duration = cpu_to_le32(req_dur); - aux_roc_req.apply_time_max_delay = cpu_to_le32(delay); + tail->duration = cpu_to_le32(req_dur); + tail->apply_time_max_delay = cpu_to_le32(delay); IWL_DEBUG_TE(mvm, "ROC: Requesting to remain on channel %u for %ums (requested = %ums, max_delay = %ums, dtim_interval = %ums)\n", channel->hw_value, req_dur, duration, delay, dtim_interval); /* Set the node address */ - memcpy(aux_roc_req.node_addr, vif->addr, ETH_ALEN); + memcpy(tail->node_addr, vif->addr, ETH_ALEN); lockdep_assert_held(&mvm->mutex); @@ -3591,7 +3597,7 @@ static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm, ARRAY_SIZE(time_event_response), iwl_mvm_rx_aux_roc, te_data); - res = iwl_mvm_send_cmd_pdu(mvm, HOT_SPOT_CMD, 0, sizeof(aux_roc_req), + res = iwl_mvm_send_cmd_pdu(mvm, HOT_SPOT_CMD, 0, len, &aux_roc_req); if (res) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 55f08e433331..59d655436a8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2059,4 +2059,59 @@ void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw, struct dentry *dir); #endif +/* Channel info utils */ +static inline bool iwl_mvm_has_ultra_hb_channel(struct iwl_mvm *mvm) +{ + return fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS); +} + +static inline void *iwl_mvm_chan_info_cmd_tail(struct iwl_mvm *mvm, + struct iwl_fw_channel_info *ci) +{ + return (u8 *)ci + (iwl_mvm_has_ultra_hb_channel(mvm) ? + sizeof(struct iwl_fw_channel_info) : + sizeof(struct iwl_fw_channel_info_v1)); +} + +static inline size_t iwl_mvm_chan_info_padding(struct iwl_mvm *mvm) +{ + return iwl_mvm_has_ultra_hb_channel(mvm) ? 0 : + sizeof(struct iwl_fw_channel_info) - + sizeof(struct iwl_fw_channel_info_v1); +} + +static inline void iwl_mvm_set_chan_info(struct iwl_mvm *mvm, + struct iwl_fw_channel_info *ci, + u32 chan, u8 band, u8 width, + u8 ctrl_pos) +{ + if (iwl_mvm_has_ultra_hb_channel(mvm)) { + ci->channel = cpu_to_le32(chan); + ci->band = band; + ci->width = width; + ci->ctrl_pos = ctrl_pos; + } else { + struct iwl_fw_channel_info_v1 *ci_v1 = + (struct iwl_fw_channel_info_v1 *)ci; + + ci_v1->channel = chan; + ci_v1->band = band; + ci_v1->width = width; + ci_v1->ctrl_pos = ctrl_pos; + } +} + +static inline void +iwl_mvm_set_chan_info_chandef(struct iwl_mvm *mvm, + struct iwl_fw_channel_info *ci, + struct cfg80211_chan_def *chandef) +{ + iwl_mvm_set_chan_info(mvm, ci, chandef->chan->hw_value, + (chandef->chan->band == NL80211_BAND_2GHZ ? + PHY_BAND_24 : PHY_BAND_5), + iwl_mvm_get_channel_width(chandef), + iwl_mvm_get_ctrl_pos(chandef)); +} + #endif /* __IWL_MVM_H__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c index 7f5434b34d0d..f369173db11c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c @@ -143,14 +143,11 @@ static void iwl_mvm_phy_ctxt_cmd_data(struct iwl_mvm *mvm, u8 chains_static, u8 chains_dynamic) { u8 active_cnt, idle_cnt; + struct iwl_phy_context_cmd_tail *tail = + iwl_mvm_chan_info_cmd_tail(mvm, &cmd->ci); /* Set the channel info data */ - cmd->ci.band = (chandef->chan->band == NL80211_BAND_2GHZ ? - PHY_BAND_24 : PHY_BAND_5); - - cmd->ci.channel = chandef->chan->hw_value; - cmd->ci.width = iwl_mvm_get_channel_width(chandef); - cmd->ci.ctrl_pos = iwl_mvm_get_ctrl_pos(chandef); + iwl_mvm_set_chan_info_chandef(mvm, &cmd->ci, chandef); /* Set rx the chains */ idle_cnt = chains_static; @@ -168,17 +165,17 @@ static void iwl_mvm_phy_ctxt_cmd_data(struct iwl_mvm *mvm, active_cnt = 2; } - cmd->rxchain_info = cpu_to_le32(iwl_mvm_get_valid_rx_ant(mvm) << + tail->rxchain_info = cpu_to_le32(iwl_mvm_get_valid_rx_ant(mvm) << PHY_RX_CHAIN_VALID_POS); - cmd->rxchain_info |= cpu_to_le32(idle_cnt << PHY_RX_CHAIN_CNT_POS); - cmd->rxchain_info |= cpu_to_le32(active_cnt << + tail->rxchain_info |= cpu_to_le32(idle_cnt << PHY_RX_CHAIN_CNT_POS); + tail->rxchain_info |= cpu_to_le32(active_cnt << PHY_RX_CHAIN_MIMO_CNT_POS); #ifdef CONFIG_IWLWIFI_DEBUGFS if (unlikely(mvm->dbgfs_rx_phyinfo)) - cmd->rxchain_info = cpu_to_le32(mvm->dbgfs_rx_phyinfo); + tail->rxchain_info = cpu_to_le32(mvm->dbgfs_rx_phyinfo); #endif - cmd->txchain_info = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm)); + tail->txchain_info = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm)); } /* @@ -195,6 +192,7 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm, { struct iwl_phy_context_cmd cmd; int ret; + u16 len = sizeof(cmd) - iwl_mvm_chan_info_padding(mvm); /* Set the command header fields */ iwl_mvm_phy_ctxt_cmd_hdr(ctxt, &cmd, action, apply_time); @@ -203,9 +201,7 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm, iwl_mvm_phy_ctxt_cmd_data(mvm, &cmd, chandef, chains_static, chains_dynamic); - ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD, 0, - sizeof(struct iwl_phy_context_cmd), - &cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD, 0, len, &cmd); if (ret) IWL_ERR(mvm, "PHY ctxt cmd error. ret=%d\n", ret); return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c index e02f4eb20359..859aa5a4e6b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c @@ -399,6 +399,9 @@ iwl_mvm_tdls_config_channel_switch(struct iwl_mvm *mvm, struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr; struct iwl_tdls_channel_switch_cmd cmd = {0}; + struct iwl_tdls_channel_switch_cmd_tail *tail = + iwl_mvm_chan_info_cmd_tail(mvm, &cmd.ci); + u16 len = sizeof(cmd) - iwl_mvm_chan_info_padding(mvm); int ret; lockdep_assert_held(&mvm->mutex); @@ -414,9 +417,9 @@ iwl_mvm_tdls_config_channel_switch(struct iwl_mvm *mvm, } cmd.switch_type = type; - cmd.timing.frame_timestamp = cpu_to_le32(timestamp); - cmd.timing.switch_time = cpu_to_le32(switch_time); - cmd.timing.switch_timeout = cpu_to_le32(switch_timeout); + tail->timing.frame_timestamp = cpu_to_le32(timestamp); + tail->timing.switch_time = cpu_to_le32(switch_time); + tail->timing.switch_timeout = cpu_to_le32(switch_timeout); rcu_read_lock(); sta = ieee80211_find_sta(vif, peer); @@ -448,21 +451,16 @@ iwl_mvm_tdls_config_channel_switch(struct iwl_mvm *mvm, } } - if (chandef) { - cmd.ci.band = (chandef->chan->band == NL80211_BAND_2GHZ ? - PHY_BAND_24 : PHY_BAND_5); - cmd.ci.channel = chandef->chan->hw_value; - cmd.ci.width = iwl_mvm_get_channel_width(chandef); - cmd.ci.ctrl_pos = iwl_mvm_get_ctrl_pos(chandef); - } + if (chandef) + iwl_mvm_set_chan_info_chandef(mvm, &cmd.ci, chandef); /* keep quota calculation simple for now - 50% of DTIM for TDLS */ - cmd.timing.max_offchan_duration = + tail->timing.max_offchan_duration = cpu_to_le32(TU_TO_US(vif->bss_conf.dtim_period * vif->bss_conf.beacon_int) / 2); /* Switch time is the first element in the switch-timing IE. */ - cmd.frame.switch_time_offset = cpu_to_le32(ch_sw_tm_ie + 2); + tail->frame.switch_time_offset = cpu_to_le32(ch_sw_tm_ie + 2); info = IEEE80211_SKB_CB(skb); hdr = (void *)skb->data; @@ -472,20 +470,19 @@ iwl_mvm_tdls_config_channel_switch(struct iwl_mvm *mvm, ret = -EINVAL; goto out; } - iwl_mvm_set_tx_cmd_ccmp(info, &cmd.frame.tx_cmd); + iwl_mvm_set_tx_cmd_ccmp(info, &tail->frame.tx_cmd); } - iwl_mvm_set_tx_cmd(mvm, skb, &cmd.frame.tx_cmd, info, + iwl_mvm_set_tx_cmd(mvm, skb, &tail->frame.tx_cmd, info, mvmsta->sta_id); - iwl_mvm_set_tx_cmd_rate(mvm, &cmd.frame.tx_cmd, info, sta, + iwl_mvm_set_tx_cmd_rate(mvm, &tail->frame.tx_cmd, info, sta, hdr->frame_control); rcu_read_unlock(); - memcpy(cmd.frame.data, skb->data, skb->len); + memcpy(tail->frame.data, skb->data, skb->len); - ret = iwl_mvm_send_cmd_pdu(mvm, TDLS_CHANNEL_SWITCH_CMD, 0, - sizeof(cmd), &cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, TDLS_CHANNEL_SWITCH_CMD, 0, len, &cmd); if (ret) { IWL_ERR(mvm, "Failed to send TDLS_CHANNEL_SWITCH cmd: %d\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 9fa1a36dcda3..5b34100e9099 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -687,6 +687,8 @@ static void iwl_mvm_remove_aux_roc_te(struct iwl_mvm *mvm, struct iwl_mvm_time_event_data *te_data) { struct iwl_hs20_roc_req aux_cmd = {}; + u16 len = sizeof(aux_cmd) - iwl_mvm_chan_info_padding(mvm); + u32 uid; int ret; @@ -700,7 +702,7 @@ static void iwl_mvm_remove_aux_roc_te(struct iwl_mvm *mvm, IWL_DEBUG_TE(mvm, "Removing BSS AUX ROC TE 0x%x\n", le32_to_cpu(aux_cmd.event_unique_id)); ret = iwl_mvm_send_cmd_pdu(mvm, HOT_SPOT_CMD, 0, - sizeof(aux_cmd), &aux_cmd); + len, &aux_cmd); if (WARN_ON(ret)) return; From 311590a3a21eda3f787a3dce1b8bf675f0d63bf7 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 19 Nov 2018 09:32:39 +0200 Subject: [PATCH 0293/1436] iwlwifi: mvm: freeze management TXQ timer when station goes to sleep We maintain a timer for each Tx queue to detect stalls and be able to recover / debug. When we work in AP mode, we can freeze the Tx queue timer if a station goes to sleep, because we don't want to warn about stalls that are caused by faulty clients that don't wake up on time. This mechanism was applied to the queues of the clients, but the management queue was omitted. Fix that. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1762b1fcb3db..d4b05caabc74 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2776,7 +2776,7 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw, return; spin_lock_bh(&mvmsta->lock); - for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) { + for (tid = 0; tid < ARRAY_SIZE(mvmsta->tid_data); tid++) { struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; if (tid_data->txq_id == IWL_MVM_INVALID_QUEUE) From fdd6c9419b6ff2498b0cffd0a89d74e823700ffa Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Tue, 13 Nov 2018 18:36:42 +0200 Subject: [PATCH 0294/1436] iwlwifi: mvm: disable completely low latency mode with debugfs We introduce a new state for latency, force mode, in force mode you can enable always to be in low latency or always to be in non low latency. This is required for test mode in max TpT test. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho --- .../wireless/intel/iwlwifi/mvm/debugfs-vif.c | 53 +++++++++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 58 +++++++++++++++++-- 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 1c0f84049573..2453ceabf00d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -543,21 +543,64 @@ static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf, return count; } +static ssize_t +iwl_dbgfs_low_latency_force_write(struct ieee80211_vif *vif, char *buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm *mvm = mvmvif->mvm; + u8 value; + int ret; + + ret = kstrtou8(buf, 0, &value); + if (ret) + return ret; + + if (value > NUM_LOW_LATENCY_FORCE) + return -EINVAL; + + mutex_lock(&mvm->mutex); + if (value == LOW_LATENCY_FORCE_UNSET) { + iwl_mvm_update_low_latency(mvm, vif, false, + LOW_LATENCY_DEBUGFS_FORCE); + iwl_mvm_update_low_latency(mvm, vif, false, + LOW_LATENCY_DEBUGFS_FORCE_ENABLE); + } else { + iwl_mvm_update_low_latency(mvm, vif, + value == LOW_LATENCY_FORCE_ON, + LOW_LATENCY_DEBUGFS_FORCE); + iwl_mvm_update_low_latency(mvm, vif, true, + LOW_LATENCY_DEBUGFS_FORCE_ENABLE); + } + mutex_unlock(&mvm->mutex); + return count; +} + static ssize_t iwl_dbgfs_low_latency_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_vif *vif = file->private_data; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - char buf[30] = {}; + char format[] = "traffic=%d\ndbgfs=%d\nvcmd=%d\nvif_type=%d\n" + "dbgfs_force_enable=%d\ndbgfs_force=%d\nactual=%d\n"; + + /* + * all values in format are boolean so the size of format is enough + * for holding the result string + */ + char buf[sizeof(format) + 1] = {}; int len; - len = scnprintf(buf, sizeof(buf) - 1, - "traffic=%d\ndbgfs=%d\nvcmd=%d\nvif_type=%d\n", + len = scnprintf(buf, sizeof(buf) - 1, format, !!(mvmvif->low_latency & LOW_LATENCY_TRAFFIC), !!(mvmvif->low_latency & LOW_LATENCY_DEBUGFS), !!(mvmvif->low_latency & LOW_LATENCY_VCMD), - !!(mvmvif->low_latency & LOW_LATENCY_VIF_TYPE)); + !!(mvmvif->low_latency & LOW_LATENCY_VIF_TYPE), + !!(mvmvif->low_latency & + LOW_LATENCY_DEBUGFS_FORCE_ENABLE), + !!(mvmvif->low_latency & LOW_LATENCY_DEBUGFS_FORCE), + !!(mvmvif->low_latency_actual)); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } @@ -710,6 +753,7 @@ MVM_DEBUGFS_READ_FILE_OPS(tx_pwr_lmt); MVM_DEBUGFS_READ_WRITE_FILE_OPS(pm_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(bf_params, 256); MVM_DEBUGFS_READ_WRITE_FILE_OPS(low_latency, 10); +MVM_DEBUGFS_WRITE_FILE_OPS(low_latency_force, 10); MVM_DEBUGFS_READ_WRITE_FILE_OPS(uapsd_misbehaving, 20); MVM_DEBUGFS_READ_WRITE_FILE_OPS(rx_phyinfo, 10); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); @@ -745,6 +789,7 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) MVM_DEBUGFS_ADD_FILE_VIF(tx_pwr_lmt, mvmvif->dbgfs_dir, 0400); MVM_DEBUGFS_ADD_FILE_VIF(mac_params, mvmvif->dbgfs_dir, 0400); MVM_DEBUGFS_ADD_FILE_VIF(low_latency, mvmvif->dbgfs_dir, 0600); + MVM_DEBUGFS_ADD_FILE_VIF(low_latency_force, mvmvif->dbgfs_dir, 0600); MVM_DEBUGFS_ADD_FILE_VIF(uapsd_misbehaving, mvmvif->dbgfs_dir, 0600); MVM_DEBUGFS_ADD_FILE_VIF(rx_phyinfo, mvmvif->dbgfs_dir, 0600); MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, 0600); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 59d655436a8f..5c9adf72691f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -299,18 +299,39 @@ enum iwl_bt_force_ant_mode { BT_FORCE_ANT_MAX, }; +/** + * struct iwl_mvm_low_latency_force - low latency force mode set by debugfs + * @LOW_LATENCY_FORCE_UNSET: unset force mode + * @LOW_LATENCY_FORCE_ON: for low latency on + * @LOW_LATENCY_FORCE_OFF: for low latency off + * @NUM_LOW_LATENCY_FORCE: max num of modes + */ +enum iwl_mvm_low_latency_force { + LOW_LATENCY_FORCE_UNSET, + LOW_LATENCY_FORCE_ON, + LOW_LATENCY_FORCE_OFF, + NUM_LOW_LATENCY_FORCE +}; + /** * struct iwl_mvm_low_latency_cause - low latency set causes * @LOW_LATENCY_TRAFFIC: indicates low latency traffic was detected * @LOW_LATENCY_DEBUGFS: low latency mode set from debugfs * @LOW_LATENCY_VCMD: low latency mode set from vendor command * @LOW_LATENCY_VIF_TYPE: low latency mode set because of vif type (ap) +* @LOW_LATENCY_DEBUGFS_FORCE_ENABLE: indicate that force mode is enabled +* the actual set/unset is done with LOW_LATENCY_DEBUGFS_FORCE +* @LOW_LATENCY_DEBUGFS_FORCE: low latency force mode from debugfs +* set this with LOW_LATENCY_DEBUGFS_FORCE_ENABLE flag +* in low_latency. */ enum iwl_mvm_low_latency_cause { LOW_LATENCY_TRAFFIC = BIT(0), LOW_LATENCY_DEBUGFS = BIT(1), LOW_LATENCY_VCMD = BIT(2), LOW_LATENCY_VIF_TYPE = BIT(3), + LOW_LATENCY_DEBUGFS_FORCE_ENABLE = BIT(4), + LOW_LATENCY_DEBUGFS_FORCE = BIT(5), }; /** @@ -361,8 +382,10 @@ struct iwl_probe_resp_data { * @pm_enabled - Indicate if MAC power management is allowed * @monitor_active: indicates that monitor context is configured, and that the * interface should get quota etc. - * @low_latency: indicates low latency is set, see - * enum &iwl_mvm_low_latency_cause for causes. + * @low_latency: bit flags for low latency + * see enum &iwl_mvm_low_latency_cause for causes. + * @low_latency_actual: boolean, indicates low latency is set, + * as a result from low_latency bit flags and takes force into account. * @ps_disabled: indicates that this interface requires PS to be disabled * @queue_params: QoS params for this MAC * @bcast_sta: station used for broadcast packets. Used by the following @@ -394,7 +417,8 @@ struct iwl_mvm_vif { bool ap_ibss_active; bool pm_enabled; bool monitor_active; - u8 low_latency; + u8 low_latency: 6; + u8 low_latency_actual: 1; bool ps_disabled; struct iwl_mvm_vif_bf_data bf_data; @@ -1918,17 +1942,43 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) * binding, so this has no real impact. For now, just return * the current desired low-latency state. */ - return mvmvif->low_latency; + return mvmvif->low_latency_actual; } static inline void iwl_mvm_vif_set_low_latency(struct iwl_mvm_vif *mvmvif, bool set, enum iwl_mvm_low_latency_cause cause) { + u8 new_state; + if (set) mvmvif->low_latency |= cause; else mvmvif->low_latency &= ~cause; + + /* + * if LOW_LATENCY_DEBUGFS_FORCE_ENABLE is enabled no changes are + * allowed to actual mode. + */ + if (mvmvif->low_latency & LOW_LATENCY_DEBUGFS_FORCE_ENABLE && + cause != LOW_LATENCY_DEBUGFS_FORCE_ENABLE) + return; + + if (cause == LOW_LATENCY_DEBUGFS_FORCE_ENABLE && set) + /* + * We enter force state + */ + new_state = !!(mvmvif->low_latency & + LOW_LATENCY_DEBUGFS_FORCE); + else + /* + * Check if any other one set low latency + */ + new_state = !!(mvmvif->low_latency & + ~(LOW_LATENCY_DEBUGFS_FORCE_ENABLE | + LOW_LATENCY_DEBUGFS_FORCE)); + + mvmvif->low_latency_actual = new_state; } /* Return a bitmask with all the hw supported queues, except for the From 98f0d01abcb98b69e3b838f1c46ab9bb9ad0fa84 Mon Sep 17 00:00:00 2001 From: Ihab Zhaika Date: Tue, 20 Nov 2018 09:33:57 +0200 Subject: [PATCH 0295/1436] iwlwifi: update product name for 9260 and 9560 update the product name for the some of the cards from the series of 9260 and 9560 Signed-off-by: Ihab Zhaika Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 6 + drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 29 +++ .../net/wireless/intel/iwlwifi/iwl-config.h | 5 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 188 +++++++++--------- 4 files changed, 134 insertions(+), 94 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 285e02e1e7b7..eb93711d474b 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -269,6 +269,12 @@ const struct iwl_cfg iwl9560_2ac_cfg_qu_b0_jf_b0 = { IWL_DEVICE_22500, }; +const struct iwl_cfg iwl9560_2ac_160_cfg_qu_b0_jf_b0 = { + .name = "Intel(R) Wireless-AC 9560 160MHz", + .fw_name_pre = IWL_QU_B_JF_B_FW_PRE, + IWL_DEVICE_22500, +}; + const struct iwl_cfg killer1550i_2ac_cfg_qu_b0_jf_b0 = { .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", .fw_name_pre = IWL_QU_B_JF_B_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index 09618f176f4f..113bcf7735a0 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -163,6 +163,12 @@ const struct iwl_cfg iwl9260_2ac_cfg = { IWL_DEVICE_9000, }; +const struct iwl_cfg iwl9260_2ac_160_cfg = { + .name = "Intel(R) Wireless-AC 9260 160MHz", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, +}; + const struct iwl_cfg iwl9260_killer_2ac_cfg = { .name = "Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)", .fw_name_pre = IWL9260_FW_PRE, @@ -211,6 +217,12 @@ const struct iwl_cfg iwl9560_2ac_cfg = { IWL_DEVICE_9000, }; +const struct iwl_cfg iwl9560_2ac_160_cfg = { + .name = "Intel(R) Wireless-AC 9560 160MHz", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, +}; + const struct iwl_cfg iwl9560_2ac_cfg_soc = { .name = "Intel(R) Dual Band Wireless AC 9560", .fw_name_pre = IWL9000_FW_PRE, @@ -219,6 +231,14 @@ const struct iwl_cfg iwl9560_2ac_cfg_soc = { .soc_latency = 5000, }; +const struct iwl_cfg iwl9560_2ac_160_cfg_soc = { + .name = "Intel(R) Wireless-AC 9560 160MHz", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .integrated = true, + .soc_latency = 5000, +}; + const struct iwl_cfg iwl9560_killer_2ac_cfg_soc = { .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", .fw_name_pre = IWL9000_FW_PRE, @@ -271,6 +291,15 @@ const struct iwl_cfg iwl9560_2ac_cfg_shared_clk = { .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK }; +const struct iwl_cfg iwl9560_2ac_160_cfg_shared_clk = { + .name = "Intel(R) Wireless-AC 9560 160MHz", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .integrated = true, + .soc_latency = 5000, + .extra_phy_cfg_flags = FW_PHY_CFG_SHARED_CLK +}; + const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk = { .name = "Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)", .fw_name_pre = IWL9000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 7698cadd4ff9..ff942532fac3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -545,20 +545,24 @@ extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; +extern const struct iwl_cfg iwl9260_2ac_160_cfg; extern const struct iwl_cfg iwl9260_killer_2ac_cfg; extern const struct iwl_cfg iwl9270_2ac_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg; extern const struct iwl_cfg iwl9560_2ac_cfg; +extern const struct iwl_cfg iwl9560_2ac_160_cfg; extern const struct iwl_cfg iwl9460_2ac_cfg_soc; extern const struct iwl_cfg iwl9461_2ac_cfg_soc; extern const struct iwl_cfg iwl9462_2ac_cfg_soc; extern const struct iwl_cfg iwl9560_2ac_cfg_soc; +extern const struct iwl_cfg iwl9560_2ac_160_cfg_soc; extern const struct iwl_cfg iwl9560_killer_2ac_cfg_soc; extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_soc; extern const struct iwl_cfg iwl9460_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9461_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9462_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9560_2ac_cfg_shared_clk; +extern const struct iwl_cfg iwl9560_2ac_160_cfg_shared_clk; extern const struct iwl_cfg iwl9560_killer_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl9560_killer_s_2ac_cfg_shared_clk; extern const struct iwl_cfg iwl22000_2ac_cfg_hr; @@ -574,6 +578,7 @@ extern const struct iwl_cfg killer1650w_2ax_cfg; extern const struct iwl_cfg iwl9461_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9462_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl9560_2ac_cfg_qu_b0_jf_b0; +extern const struct iwl_cfg iwl9560_2ac_160_cfg_qu_b0_jf_b0; extern const struct iwl_cfg killer1550i_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0; extern const struct iwl_cfg iwl22000_2ax_cfg_jf; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 461135f8cef1..a22e47639a4e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -513,10 +513,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)}, /* 9000 Series */ - {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -531,17 +531,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -556,21 +556,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x06F0, 0x02A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x06F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x06F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0018, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x001C, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0018, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x001C, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, @@ -594,26 +594,26 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2526, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x8010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x8014, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x8010, iwl9260_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -631,17 +631,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg)}, + {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -659,17 +659,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x003C, iwl9560_2ac_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x003C, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0064, iwl9461_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x00A0, iwl9462_2ac_cfg_shared_clk)}, @@ -687,18 +687,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x1551, iwl9560_killer_s_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x1552, iwl9560_killer_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x31DC, 0x4034, iwl9560_2ac_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_160_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_160_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_160_cfg_shared_clk)}, + {IWL_PCI_DEVICE(0x31DC, 0x4034, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x40A4, iwl9462_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x4234, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x42A4, iwl9462_2ac_cfg_shared_clk)}, - {IWL_PCI_DEVICE(0x34F0, 0x0030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x0038, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x003C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, @@ -713,18 +713,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x34F0, 0x4034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x34F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -742,17 +742,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -770,19 +770,19 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -808,18 +808,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x9DF0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0x9DF0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -837,17 +837,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x003C, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x003C, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0064, iwl9461_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x00A0, iwl9462_2ac_cfg_soc)}, @@ -865,10 +865,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA370, 0x4034, iwl9560_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_160_cfg_soc)}, + {IWL_PCI_DEVICE(0xA370, 0x4034, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x42A4, iwl9462_2ac_cfg_soc)}, @@ -891,7 +891,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x06F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)}, {IWL_PCI_DEVICE(0x06F0, 0x4070, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0000, iwl22560_2ax_cfg_hr)}, - {IWL_PCI_DEVICE(0x2720, 0x0030, iwl22000_2ac_cfg_hr_cdb)}, + {IWL_PCI_DEVICE(0x2720, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x0040, iwl22560_2ax_cfg_hr)}, {IWL_PCI_DEVICE(0x2720, 0x0070, iwl22000_2ac_cfg_hr_cdb)}, {IWL_PCI_DEVICE(0x2720, 0x0074, iwl22560_2ax_cfg_hr)}, From 81444538ac586613e7ac3f04a85be2c22bc8a550 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Nov 2018 17:14:06 +0100 Subject: [PATCH 0296/1436] iwlwifi: mvm: document monitor mode reorder buffer bypass The reorder buffer is bypassed because the firmware won't have any BA sessions, document this. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7b6f4585fec9..9fde322904f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -658,6 +658,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, * This also covers the case of receiving a Block Ack Request * outside a BA session; we'll pass it to mac80211 and that * then sends a delBA action frame. + * This also covers pure monitor mode, in which case we won't + * have any BA sessions. */ if (baid == IWL_RX_REORDER_DATA_INVALID_BAID) return false; From 9bf13bee2d74a3b7bc0a59b1af7ad5f0a37f2176 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Nov 2018 17:58:46 +0100 Subject: [PATCH 0297/1436] iwlwifi: mvm: include configured sniffer AID in radiotap In order to make more sense out of the captured radiotap data e.g. when the configured AID changes, add the currently configured AID to the radiotap data as a vendor extension field. This is made race-free by updating the included value from inside the RX path (using a notification wait) for the command response from the firmware, which thus means it's serialized with frame RX. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- .../net/wireless/intel/iwlwifi/mvm/debugfs.c | 44 ++++++++++++++++++- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 + drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 ++ drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 39 ++++++++++++++-- 4 files changed, 84 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 91b46361ac9f..e136475a34f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1682,11 +1682,33 @@ iwl_dbgfs_send_echo_cmd_write(struct iwl_mvm *mvm, char *buf, return ret ?: count; } +struct iwl_mvm_sniffer_apply { + struct iwl_mvm *mvm; + u16 aid; +}; + +static bool iwl_mvm_sniffer_apply(struct iwl_notif_wait_data *notif_data, + struct iwl_rx_packet *pkt, void *data) +{ + struct iwl_mvm_sniffer_apply *apply = data; + + apply->mvm->cur_aid = cpu_to_le16(apply->aid); + + return true; +} + static ssize_t iwl_dbgfs_he_sniffer_params_write(struct iwl_mvm *mvm, char *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { + struct iwl_notification_wait wait; struct iwl_he_monitor_cmd he_mon_cmd = {}; + struct iwl_mvm_sniffer_apply apply = { + .mvm = mvm, + }; + u16 wait_cmds[] = { + iwl_cmd_id(HE_AIR_SNIFFER_CONFIG_CMD, DATA_PATH_GROUP, 0), + }; u32 aid; int ret; @@ -1702,10 +1724,30 @@ iwl_dbgfs_he_sniffer_params_write(struct iwl_mvm *mvm, char *buf, he_mon_cmd.aid = cpu_to_le16(aid); + apply.aid = aid; + mutex_lock(&mvm->mutex); + + /* + * Use the notification waiter to get our function triggered + * in sequence with other RX. This ensures that frames we get + * on the RX queue _before_ the new configuration is applied + * still have mvm->cur_aid pointing to the old AID, and that + * frames on the RX queue _after_ the firmware processed the + * new configuration (and sent the response, synchronously) + * get mvm->cur_aid correctly set to the new AID. + */ + iwl_init_notification_wait(&mvm->notif_wait, &wait, + wait_cmds, ARRAY_SIZE(wait_cmds), + iwl_mvm_sniffer_apply, &apply); + ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(HE_AIR_SNIFFER_CONFIG_CMD, DATA_PATH_GROUP, 0), 0, sizeof(he_mon_cmd), &he_mon_cmd); + + /* no need to really wait, we already did anyway */ + iwl_remove_notification(&mvm->notif_wait, &wait); + mutex_unlock(&mvm->mutex); return ret ?: count; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d4b05caabc74..531d2de131da 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1167,6 +1167,8 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm) iwl_mvm_stop_device(mvm); + mvm->cur_aid = 0; + mvm->scan_status = 0; mvm->ps_disabled = false; mvm->calibrating = false; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 5c9adf72691f..ac526b412aa6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1164,6 +1164,10 @@ struct iwl_mvm { /* does a monitor vif exist (only one can exist hence bool) */ bool monitor_on; + + /* sniffer data to include in radiotap */ + __le16 cur_aid; + #ifdef CONFIG_ACPI struct iwl_mvm_sar_profile sar_profiles[ACPI_SAR_PROFILE_NUM]; struct iwl_mvm_geo_profile geo_profiles[ACPI_NUM_GEO_PROFILES]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 9fde322904f4..79860f6ac7ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -192,11 +192,40 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr, } } +static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, + struct sk_buff *skb) +{ + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_vendor_radiotap *radiotap; + int size = sizeof(*radiotap) + sizeof(__le16); + + if (!mvm->cur_aid) + return; + + radiotap = skb_put(skb, size); + radiotap->align = 1; + /* Intel OUI */ + radiotap->oui[0] = 0xf6; + radiotap->oui[1] = 0x54; + radiotap->oui[2] = 0x25; + /* radiotap sniffer config sub-namespace */ + radiotap->subns = 1; + radiotap->present = 0x1; + radiotap->len = size - sizeof(*radiotap); + radiotap->pad = 0; + + /* fill the data now */ + memcpy(radiotap->data, &mvm->cur_aid, sizeof(mvm->cur_aid)); + + rx_status->flag |= RX_FLAG_RADIOTAP_VENDOR_DATA; +} + /* iwl_mvm_pass_packet_to_mac80211 - passes the packet for mac80211 */ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, struct napi_struct *napi, struct sk_buff *skb, int queue, - struct ieee80211_sta *sta) + struct ieee80211_sta *sta, + bool csi) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); @@ -465,7 +494,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, - sta); + sta, false); reorder_buf->num_stored--; } } @@ -1304,6 +1333,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, .d4 = desc->phy_data4, .info_type = IWL_RX_PHY_INFO_TYPE_NONE, }; + bool csi = false; if (unlikely(test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))) return; @@ -1599,7 +1629,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb); if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc)) - iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, + sta, csi); out: rcu_read_unlock(); } @@ -1732,7 +1763,7 @@ void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->rate_idx = rate; } - iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, false); out: rcu_read_unlock(); } From 487ec49c358a5c9d4fed386cee7f17a81572a7df Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Thu, 22 Nov 2018 08:01:30 +0200 Subject: [PATCH 0298/1436] iwlwifi: trigger dump on assert prior to setting the device up In the regular flow, when we receive an assert, ieee80211_reconfig is called which reconfig the driver using iwl_mvm_mac_start. iwl_mvm_mac_start is clearing the restart bit and does dump collection. Prior to setting the device up, ieee80211_reconfig does not call iwl_mvm_mac_start since there is nothing to reconfig and we miss the dump collection of the assert. solve it by checking the restart bit before we stop the device and trigger a dump collection in case it is set. note that we don't need to do it in the fmac case since in fmac assert flow in iwl_fmac_nic_error we call iwl_fw_dbg_collect_desc so we can be sure that there will a dump collection in iwl_fmac_stop_device. Signed-off-by: Shahar S Matityahu Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index ac526b412aa6..12e9ecc3ee27 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1997,6 +1997,16 @@ static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm) static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm) { lockdep_assert_held(&mvm->mutex); + /* If IWL_MVM_STATUS_HW_RESTART_REQUESTED bit is set then we received + * an assert. Since we failed to bring the interface up, mac80211 + * will not attempt to reconfig the device, + * which handles the dump collection in assert flow, + * so trigger dump collection here. + */ + if (test_and_clear_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, + &mvm->status)) + iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, + false, 0); /* calling this function without using dump_start/end since at this * point we already hold the op mode mutex */ From 40ecdd01d46e67fddff4407f7c37554d9640d1ea Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 7 Nov 2018 18:51:06 +0200 Subject: [PATCH 0299/1436] iwlwifi: mvm: update firmware when MU EDCA params change When MU EDCA params change, resend the STA_HE_CTXT command updating the MU EDCA params. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 531d2de131da..c9effd7f0ef8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2252,6 +2252,12 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_recalc_tsf_id(mvm, vif); } + /* Update MU EDCA params */ + if (changes & BSS_CHANGED_QOS && mvmvif->associated && + bss_conf->assoc && vif->bss_conf.he_support && + !iwlwifi_mod_params.disable_11ax) + iwl_mvm_cfg_he_sta(mvm, vif, mvmvif->ap_sta_id); + /* * If we're not associated yet, take the (new) BSSID before associating * so the firmware knows. If we're already associated, then use the old From 07a44b322397b1a5e8c96ceef8ce352fe79ac7ee Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 26 Nov 2018 11:19:00 +0200 Subject: [PATCH 0300/1436] iwlwifi: dbg_ini: allocate dram buffer with proper flags Use the same flags we use in the legacy buffer allocation. Missing __GFP_NOWARN will cause an unwanted warning incase we fail to allocate. Missing __GFP_ZERO will give a buffer initially filled with garbage which can make debug difficult in case our wrap count is zero. Signed-off-by: Shahar S Matityahu Fixes: d47902f9f71d ("iwlwifi: dbg: add apply point logic") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 917aafe1a9db..947f282c05a9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1697,8 +1697,10 @@ iwl_fw_dbg_buffer_allocation(struct iwl_fw_runtime *fwrt, IWL_FW_INI_LOCATION_DRAM_PATH) return; - virtual_addr = dma_alloc_coherent(fwrt->trans->dev, size, - &phys_addr, GFP_KERNEL); + virtual_addr = + dma_alloc_coherent(fwrt->trans->dev, size, &phys_addr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO | + __GFP_COMP); /* TODO: alloc fragments if needed */ if (!virtual_addr) From cd0fc658fbb77b07b9bcd2fa315acfda40e02539 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 26 Nov 2018 11:02:54 +0200 Subject: [PATCH 0301/1436] iwlwifi: dbg_ini: properly handle ini user trigger in case of a user trigger while ini is enable we change FW_DBG_TRIGGER_USER to IWL_FW_TRIGGER_ID_USER_TRIGGER in iwl_fw_dbg_collect and then again we attempt to do so in _iwl_fw_error_ini_dump which causes to abort the dump. Fix it by removing the second check in _iwl_fw_error_ini_dump. Signed-off-by: Shahar S Matityahu Fixes: 7a14c23dcdee ("iwlwifi: dbg: dump data according to the new ini TLVs") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 947f282c05a9..07dc2919b0b4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1254,10 +1254,6 @@ _iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt, if (id == FW_DBG_TRIGGER_FW_ASSERT) id = IWL_FW_TRIGGER_ID_FW_ASSERT; - else if (id == FW_DBG_TRIGGER_USER) - id = IWL_FW_TRIGGER_ID_USER_TRIGGER; - else if (id < FW_DBG_TRIGGER_MAX) - return NULL; if (WARN_ON(id >= ARRAY_SIZE(fwrt->dump.active_trigs))) return NULL; From b2aea95c183360e3a3add84e349915b87f3680b2 Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 26 Nov 2018 10:34:37 +0200 Subject: [PATCH 0302/1436] iwlwifi: dbg_ini: retrieve dump_delay value properly The driver uses ignore_consec instead of dump_delay. Fix it by using dump_delay as expected. Signed-off-by: Shahar S Matityahu Fixes: fe1b7d6c2888 ("iwlwifi: add support for triggering ini triggers") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 07dc2919b0b4..02ad87022e78 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1489,7 +1489,7 @@ int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt, if (WARN_ON(!fwrt->dump.active_trigs[id].active)) return -EINVAL; - delay = le32_to_cpu(fwrt->dump.active_trigs[id].conf->ignore_consec); + delay = le32_to_cpu(fwrt->dump.active_trigs[id].conf->dump_delay); occur = le32_to_cpu(fwrt->dump.active_trigs[id].conf->occurrences); if (!occur) return 0; From 55fbf0d21c443dc98a0c537d3db5a29be54c04de Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 26 Nov 2018 10:29:15 +0200 Subject: [PATCH 0303/1436] iwlwifi: dbg_ini: set dump mask BIT(n) instead of n The driver sets dump_mask value instead of BIT(value). fix it by updating dump_mask correctly. Signed-off-by: Shahar S Matityahu Fixes: 7a14c23dcdee ("iwlwifi: dbg: dump data according to the new ini TLVs") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 02ad87022e78..22efd94da6d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1216,13 +1216,13 @@ static void iwl_fw_ini_dump_trigger(struct iwl_fw_runtime *fwrt, iwl_dump_prph_ini(fwrt->trans, data, reg); break; case IWL_FW_INI_REGION_DRAM_BUFFER: - *dump_mask |= IWL_FW_ERROR_DUMP_FW_MONITOR; + *dump_mask |= BIT(IWL_FW_ERROR_DUMP_FW_MONITOR); break; case IWL_FW_INI_REGION_PAGING: if (iwl_fw_dbg_is_paging_enabled(fwrt)) iwl_dump_paging(fwrt, data); else - *dump_mask |= IWL_FW_ERROR_DUMP_PAGING; + *dump_mask |= BIT(IWL_FW_ERROR_DUMP_PAGING); break; case IWL_FW_INI_REGION_TXF: iwl_fw_dump_txf(fwrt, data); From 01f377dc927da6e42cbffb00fc8d431c8341da3f Mon Sep 17 00:00:00 2001 From: Shahar S Matityahu Date: Mon, 26 Nov 2018 11:33:09 +0200 Subject: [PATCH 0304/1436] iwiwifi: fix bad monitor buffer register addresses The monitor buffer register address is wrong. Set the right address Signed-off-by: Shahar S Matityahu Fixes: c2d202017da1 ("iwlwifi: pcie: add firmware monitor capabilities") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 9d89b7d7f9fa..3aaa5f06461c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -358,12 +358,12 @@ /* FW monitor */ #define MON_BUFF_SAMPLE_CTL (0xa03c00) -#define MON_BUFF_BASE_ADDR (0xa03c3c) +#define MON_BUFF_BASE_ADDR (0xa03c1c) #define MON_BUFF_END_ADDR (0xa03c40) #define MON_BUFF_WRPTR (0xa03c44) #define MON_BUFF_CYCLE_CNT (0xa03c48) /* FW monitor family 8000 and on */ -#define MON_BUFF_BASE_ADDR_VER2 (0xa03c3c) +#define MON_BUFF_BASE_ADDR_VER2 (0xa03c1c) #define MON_BUFF_END_ADDR_VER2 (0xa03c20) #define MON_BUFF_WRPTR_VER2 (0xa03c24) #define MON_BUFF_CYCLE_CNT_VER2 (0xa03c28) From 693abe11aa6b27aed6eb8222162f8fb986325cef Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 29 Jan 2019 15:38:21 +0800 Subject: [PATCH 0305/1436] ALSA: hda/realtek - Fixed hp_pin no value Fix hp_pin always no value. [More notes on the changes: The hp_pin value that is referred in alc294_hp_init() is always zero at the moment the function gets called, hence this is actually useless as in the current code. And, this kind of init sequence should be called from the codec init callback, instead of the parser function. So, the first fix in this patch to move the call call into its own init_hook. OTOH, this function is needed to be called only once after the boot, and it'd take too long for invoking at each resume (where the init callback gets called). So we add a new flag and invoke this only once as an additional fix. The one case is still not covered, though: S4 resume. But this change itself won't lead to any regression in that regard, so we leave S4 issue as is for now and fix it later. -- tiwai ] Fixes: bde1a7459623 ("ALSA: hda/realtek - Fixed headphone issue for ALC700") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 78 ++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b4f472157ebd..4139aced63f8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -117,6 +117,7 @@ struct alc_spec { int codec_variant; /* flag for other variants */ unsigned int has_alc5505_dsp:1; unsigned int no_depop_delay:1; + unsigned int done_hp_init:1; /* for PLL fix */ hda_nid_t pll_nid; @@ -3372,6 +3373,48 @@ static void alc_default_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc294_hp_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + int i, val; + + if (!hp_pin) + return; + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + msleep(100); + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ + alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ + + /* Wait for depop procedure finish */ + val = alc_read_coefex_idx(codec, 0x58, 0x01); + for (i = 0; i < 20 && val & 0x0080; i++) { + msleep(50); + val = alc_read_coefex_idx(codec, 0x58, 0x01); + } + /* Set HP depop to auto mode */ + alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); + msleep(50); +} + +static void alc294_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (!spec->done_hp_init) { + alc294_hp_init(codec); + spec->done_hp_init = true; + } + alc_default_init(codec); +} + static void alc5505_coef_set(struct hda_codec *codec, unsigned int index_reg, unsigned int val) { @@ -7373,37 +7416,6 @@ static void alc269_fill_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4, 0, 1<<11); } -static void alc294_hp_init(struct hda_codec *codec) -{ - struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; - int i, val; - - if (!hp_pin) - return; - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); - - msleep(100); - - snd_hda_codec_write(codec, hp_pin, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - - alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */ - alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */ - - /* Wait for depop procedure finish */ - val = alc_read_coefex_idx(codec, 0x58, 0x01); - for (i = 0; i < 20 && val & 0x0080; i++) { - msleep(50); - val = alc_read_coefex_idx(codec, 0x58, 0x01); - } - /* Set HP depop to auto mode */ - alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b); - msleep(50); -} - /* */ static int patch_alc269(struct hda_codec *codec) @@ -7529,7 +7541,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC294; spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; case 0x10ec0300: spec->codec_variant = ALC269_TYPE_ALC300; @@ -7541,7 +7553,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ - alc294_hp_init(codec); + spec->init_hook = alc294_init; break; } From 00ae831dfe4474ef6029558f5eb3ef0332d80043 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 25 Jan 2019 11:59:01 -0800 Subject: [PATCH 0306/1436] x86/cpu: Add Atom Tremont (Jacobsville) Add the Atom Tremont model number to the Intel family list. [ Tony: Also update comment at head of file to say "_X" suffix is also used for microserver parts. ] Signed-off-by: Kan Liang Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Andy Shevchenko Cc: Aristeu Rozanski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-edac Cc: Mauro Carvalho Chehab Cc: Megha Dey Cc: Peter Zijlstra Cc: Qiuxu Zhuo Cc: Rajneesh Bhardwaj Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190125195902.17109-4-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0dd6b0f4000e..d9a9993af882 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -6,7 +6,7 @@ * "Big Core" Processors (Branded as Core, Xeon, etc...) * * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. + * "Extreme" ones, like Broadwell-E, or Atom microserver. * * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within @@ -71,6 +71,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ From 9bd34c63f5536c490c152833c77fa47f59aeade3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 22 Jan 2019 09:03:08 -0800 Subject: [PATCH 0307/1436] bus: ti-sysc: Fix timer handling with drop pm_runtime_irq_safe() Commit 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") started producing a warning for pwm-omap-dmtimer: WARNING: CPU: 0 PID: 77 at drivers/bus/omap_l3_noc.c:147 l3_interrupt_handler+0x2f8/0x388 44000000.ocp:L3 Custom Error: MASTER MPU TARGET L4PER2 (Idle): Data Access in Supervisor mode during Functional access ... __pm_runtime_idle omap_dm_timer_disable pwm_omap_dmtimer_start pwm_omap_dmtimer_enable pwm_apply_state pwm_vibrator_start pwm_vibrator_play_work This is because the timer that pwm-omap-dmtimer is using is now being probed with ti-sysc interconnect target module instead of omap_device and the ti-sysc quirk for SYSC_QUIRK_LEGACY_IDLE is not fully compatible with what omap_device has been doing. We could fix this by reverting the timer changes and have the timer probe again with omap_device. Or we could add more quirk handling to ti-sysc driver. But as these options don't work nicely as longer term solutions, let's just make timers probe with ti-sysc without any quirks. To do this, all we need to do is remove quirks for timers for ti-sysc, and drop the bogus pm_runtime_irq_safe() flag for timer-ti-dm. We should not use pm_runtime_irq_safe() anyways for drivers as it will take a permanent use count on the parent device blocking the parent devices from idling and has been forcing ti-sysc driver to use a quirk flag. Note that we will move the timer data to DEBUG section later on in clean-up patches. Fixes: 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") Cc: Andy Shevchenko Cc: Bartosz Golaszewski Cc: Daniel Lezcano Cc: H. Nikolaus Schaller Cc: Keerthy Cc: Ladislav Michl Cc: Pavel Machek Cc: Sebastian Reichel Cc: Tero Kristo Cc: Thierry Reding Cc: Thomas Gleixner Reported-by: H. Nikolaus Schaller Tested-By: Andreas Kemnade Tested-By: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 4 ++-- drivers/clocksource/timer-ti-dm.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index e4fe954e63a9..12a0401339b8 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -879,10 +879,10 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("smartreflex", 0, -1, 0x38, -1, 0x00000000, 0xffffffff, SYSC_QUIRK_LEGACY_IDLE), SYSC_QUIRK("timer", 0, 0, 0x10, 0x14, 0x00000015, 0xffffffff, - SYSC_QUIRK_LEGACY_IDLE), + 0), /* Some timers on omap4 and later */ SYSC_QUIRK("timer", 0, 0, 0x10, -1, 0x4fff1301, 0xffffffff, - SYSC_QUIRK_LEGACY_IDLE), + 0), SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x00000052, 0xffffffff, SYSC_QUIRK_LEGACY_IDLE), /* Uarts on omap4 and later */ diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 3ecf84706640..23414dddc3ba 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -868,7 +868,6 @@ static int omap_dm_timer_probe(struct platform_device *pdev) timer->pdev = pdev; pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); if (!timer->reserved) { ret = pm_runtime_get_sync(dev); From 0840242e887586268f665bf58d5e1a7d6ebf35ed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 22 Jan 2019 08:21:01 -0800 Subject: [PATCH 0308/1436] ARM: dts: Configure clock parent for pwm vibra Commit 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") moved some omap4 timers to probe with ti-sysc interconnect target module. Turns out this broke pwm-omap-dmtimer for reparenting of the timer clock. With ti-sysc, we can now configure the clock sources in the dts with assigned-clocks and assigned-clock-parents. Fixes: 84badc5ec5fc ("ARM: dts: omap4: Move l4 child devices to probe them with ti-sysc") Cc: Bartosz Golaszewski Cc: Daniel Lezcano Cc: H. Nikolaus Schaller Cc: Keerthy Cc: Ladislav Michl Cc: Pavel Machek Cc: Sebastian Reichel Cc: Tero Kristo Cc: Thierry Reding Cc: Thomas Gleixner Reported-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 04758a2a87f0..67d77eee9433 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -644,6 +644,17 @@ }; }; +/* Configure pwm clock source for timers 8 & 9 */ +&timer8 { + assigned-clocks = <&abe_clkctrl OMAP4_TIMER8_CLKCTRL 24>; + assigned-clock-parents = <&sys_clkin_ck>; +}; + +&timer9 { + assigned-clocks = <&l4_per_clkctrl OMAP4_TIMER9_CLKCTRL 24>; + assigned-clock-parents = <&sys_clkin_ck>; +}; + /* * As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for * uart1 wakeirq. From 9791ec7df0e7b4d80706ccea8f24b6542f6059e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Jan 2019 10:02:33 +0000 Subject: [PATCH 0309/1436] irqchip/gic-v3-its: Plug allocation race for devices sharing a DevID On systems or VMs where multiple devices share a single DevID (because they sit behind a PCI bridge, or because the HW is broken in funky ways), we reuse the save its_device structure in order to reflect this. It turns out that there is a distinct lack of locking when looking up the its_device, and two device being probed concurrently can result in double allocations. That's obviously not nice. A solution for this is to have a per-ITS mutex that serializes device allocation. A similar issue exists on the freeing side, which can run concurrently with the allocation. On top of now taking the appropriate lock, we also make sure that a shared device is never freed, as we have no way to currently track the life cycle of such object. Reported-by: Zheng Xiang Tested-by: Zheng Xiang Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 36181197d5e0..f25ec92f23ee 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -97,9 +97,14 @@ struct its_device; * The ITS structure - contains most of the infrastructure, with the * top-level MSI domain, the command queue, the collections, and the * list of devices writing to it. + * + * dev_alloc_lock has to be taken for device allocations, while the + * spinlock must be taken to parse data structures such as the device + * list. */ struct its_node { raw_spinlock_t lock; + struct mutex dev_alloc_lock; struct list_head entry; void __iomem *base; phys_addr_t phys_base; @@ -156,6 +161,7 @@ struct its_device { void *itt; u32 nr_ites; u32 device_id; + bool shared; }; static struct { @@ -2469,6 +2475,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, struct its_device *its_dev; struct msi_domain_info *msi_info; u32 dev_id; + int err = 0; /* * We ignore "dev" entierely, and rely on the dev_id that has @@ -2491,6 +2498,7 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, return -EINVAL; } + mutex_lock(&its->dev_alloc_lock); its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -2498,18 +2506,22 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, * another alias (PCI bridge of some sort). No need to * create the device. */ + its_dev->shared = true; pr_debug("Reusing ITT for devID %x\n", dev_id); goto out; } its_dev = its_create_device(its, dev_id, nvec, true); - if (!its_dev) - return -ENOMEM; + if (!its_dev) { + err = -ENOMEM; + goto out; + } pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); out: + mutex_unlock(&its->dev_alloc_lock); info->scratchpad[0].ptr = its_dev; - return 0; + return err; } static struct msi_domain_ops its_msi_domain_ops = { @@ -2613,6 +2625,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, { struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_node *its = its_dev->its; int i; for (i = 0; i < nr_irqs; i++) { @@ -2627,8 +2640,14 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_reset_irq_data(data); } - /* If all interrupts have been freed, start mopping the floor */ - if (bitmap_empty(its_dev->event_map.lpi_map, + mutex_lock(&its->dev_alloc_lock); + + /* + * If all interrupts have been freed, start mopping the + * floor. This is conditionned on the device not being shared. + */ + if (!its_dev->shared && + bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { its_lpi_free(its_dev->event_map.lpi_map, its_dev->event_map.lpi_base, @@ -2640,6 +2659,8 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, its_free_device(its_dev); } + mutex_unlock(&its->dev_alloc_lock); + irq_domain_free_irqs_parent(domain, virq, nr_irqs); } @@ -3549,6 +3570,7 @@ static int __init its_probe_one(struct resource *res, } raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); INIT_LIST_HEAD(&its->entry); INIT_LIST_HEAD(&its->its_device_list); typer = gic_read_typer(its_base + GITS_TYPER); From 45725e0fc3e7fe52fedb94f59806ec50e9618682 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Jan 2019 15:19:23 +0000 Subject: [PATCH 0310/1436] irqchip/gic-v3-its: Gracefully fail on LPI exhaustion In the unlikely event that we cannot find any available LPI in the system, we should gracefully return an error instead of carrying on with no LPI allocated at all. Fixes: 38dd7c494cf6 ("irqchip/gic-v3-its: Drop chunk allocation compatibility") Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f25ec92f23ee..c3aba3fc818d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1586,6 +1586,9 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) nr_irqs /= 2; } while (nr_irqs > 0); + if (!nr_irqs) + err = -ENOSPC; + if (err) goto out; From 2380a22b60ce6f995eac806e69c66e397b59d045 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 28 Jan 2019 16:59:35 +0100 Subject: [PATCH 0311/1436] irqchip/mmp: Only touch the PJ4 IRQ & FIQ bits on enable/disable Resetting bit 4 disables the interrupt delivery to the "secure processor" core. This breaks the keyboard on a OLPC XO 1.75 laptop, where the firmware running on the "secure processor" bit-bangs the PS/2 protocol over the GPIO lines. It is not clear what the rest of the bits are and Marvell was unhelpful when asked for documentation. Aside from the SP bit, there are probably priority bits. Leaving the unknown bits as the firmware set them up seems to be a wiser course of action compared to just turning them off. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek [maz: fixed-up subject and commit message] Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d7764..3496b61a312a 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f +#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = { static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, }; static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) From dc14eb12f6bb3e779c5461429c1889a339c67aab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Jan 2019 22:19:28 -0500 Subject: [PATCH 0312/1436] drm/amdgpu: Add missing power attribute to APU check Add missing power_average to visible check for power attributes for APUs. Was missed before. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6896dec97fc7..0ed41a9d2d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1686,7 +1686,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; if ((adev->flags & AMD_IS_APU) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + (attr == &sensor_dev_attr_power1_average.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; From afeff4c16edaa6275b903f82b0561406259aa3a3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jan 2019 12:05:16 -0500 Subject: [PATCH 0313/1436] drm/radeon: check if device is root before getting pci speed caps Check if the device is root rather before attempting to see what speeds the pcie port supports. Fixes a crash with pci passthrough in a VM. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109366 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 5 +++-- drivers/gpu/drm/radeon/si_dpm.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index d587779a80b4..a97294ac96d5 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev) u16 data_offset, size; u8 frev, crev; struct ci_power_info *pi; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8fb60b3af015..0a785ef0ab66 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev) struct ni_power_info *ni_pi; struct si_power_info *si_pi; struct atom_clock_dividers dividers; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { si_pi->sys_pcie_mask = 0; } else { From 2f10d823739680d2477ce34437e8a08a53117f40 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 25 Jan 2019 15:55:33 -0600 Subject: [PATCH 0314/1436] drm/amd/powerplay: Fix missing break in switch Add missing break statement in order to prevent the code from falling through to the default case. The resoning for this is that pclk_vol_table is an automatic variable. So, it makes no sense to update it just before falling through to the default case and return -EINVAL. This bug was found thanks to the ongoing efforts to enabling -Wimplicit-fallthrough. Fixes: cd70f3d6e3fa ("drm/amd/powerplay: PP/DAL interface changes for dynamic clock switch") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index f95c5f50eb0f..5273de3c5b98 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1033,6 +1033,7 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr, break; case amd_pp_dpp_clock: pclk_vol_table = pinfo->vdd_dep_on_dppclk; + break; default: return -EINVAL; } From 0604628bb03ae695f61b872cc59f1933a8379625 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 09:15:02 +0100 Subject: [PATCH 0315/1436] netfilter: nf_tables: add NFTA_RULE_POSITION_ID to nla_policy Fixes: 75dd48e2e420a ("netfilter: nf_tables: Support RULE_ID reference in new rule") Reported-by: Cong Wang Signed-off-by: Florian Westphal Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e92bedd09cde..7495f29d24e8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2239,6 +2239,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_RULE_ID] = { .type = NLA_U32 }, + [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, From dc30e70391376ba3987aeb856ae6d9c0706534f1 Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Fri, 25 Jan 2019 22:32:20 -0800 Subject: [PATCH 0316/1436] ARM: OMAP2+: Variable "reg" in function omap4_dsi_mux_pads() could be uninitialized In function omap4_dsi_mux_pads(), local variable "reg" could be uninitialized if function regmap_read() returns -EINVAL. However, it will be used directly in the later context, which is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index f86b72d1d59e..1444b4b4bd9f 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -83,6 +83,7 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) u32 enable_mask, enable_shift; u32 pipd_mask, pipd_shift; u32 reg; + int ret; if (dsi_id == 0) { enable_mask = OMAP4_DSI1_LANEENABLE_MASK; @@ -98,7 +99,11 @@ static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) return -ENODEV; } - regmap_read(omap4_dsi_mux_syscon, OMAP4_DSIPHY_SYSCON_OFFSET, ®); + ret = regmap_read(omap4_dsi_mux_syscon, + OMAP4_DSIPHY_SYSCON_OFFSET, + ®); + if (ret) + return ret; reg &= ~enable_mask; reg &= ~pipd_mask; From dcf300a69ac307053dfb35c2e33972e754a98bce Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 27 Jan 2019 23:28:33 +0200 Subject: [PATCH 0317/1436] MIPS: OCTEON: don't set octeon_dma_bar_type if PCI is disabled Don't set octeon_dma_bar_type if PCI is disabled. This avoids creation of the MSI irqchip later on, and saves a bit of memory. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Fixes: a214720cbf50 ("Disable MSI also when pcie-octeon.pcie_disable on") Cc: stable@vger.kernel.org # v3.3+ Cc: linux-mips@vger.kernel.org --- arch/mips/pci/pci-octeon.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 5017d5843c5a..fc29b85cfa92 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -568,6 +568,11 @@ static int __init octeon_pci_setup(void) if (octeon_has_feature(OCTEON_FEATURE_PCIE)) return 0; + if (!octeon_is_pci_host()) { + pr_notice("Not in host mode, PCI Controller not initialized\n"); + return 0; + } + /* Point pcibios_map_irq() to the PCI version of it */ octeon_pcibios_map_irq = octeon_pci_pcibios_map_irq; @@ -579,11 +584,6 @@ static int __init octeon_pci_setup(void) else octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG; - if (!octeon_is_pci_host()) { - pr_notice("Not in host mode, PCI Controller not initialized\n"); - return 0; - } - /* PCI I/O and PCI MEM values */ set_io_port_base(OCTEON_PCI_IOSPACE_BASE); ioport_resource.start = 0; From 0648e50e548d881d025b9419a1a168753c8e2bf7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Jan 2019 22:21:17 +0000 Subject: [PATCH 0318/1436] MIPS: VDSO: Use same -m%-float cflag as the kernel proper The MIPS VDSO build currently doesn't provide the -msoft-float flag to the compiler as the kernel proper does. This results in an attempt to use the compiler's default floating point configuration, which can be problematic in cases where this is incompatible with the target CPU's -march= flag. For example decstation_defconfig fails to build using toolchains in which gcc was configured --with-fp-32=xx with the following error: LDS arch/mips/vdso/vdso.lds cc1: error: '-march=r3000' requires '-mfp32' make[2]: *** [scripts/Makefile.build:379: arch/mips/vdso/vdso.lds] Error 1 The kernel proper avoids this error because we build with the -msoft-float compiler flag, rather than using the compiler's default. Pass this flag through to the VDSO build so that it too becomes agnostic to the toolchain's floating point configuration. Note that this is filtered out from KBUILD_CFLAGS rather than simply always using -msoft-float such that if we switch the kernel to use -mno-float in the future the VDSO will automatically inherit the change. The VDSO doesn't actually include any floating point code, and its .MIPS.abiflags section is already manually generated to specify that it's compatible with any floating point ABI. As such this change should have no effect on the resulting VDSO, apart from fixing the build failure for affected toolchains. Signed-off-by: Paul Burton Reported-by: Kevin Hilman Reported-by: Guenter Roeck Tested-by: Kevin Hilman References: https://lore.kernel.org/linux-mips/1477843551-21813-1-git-send-email-linux@roeck-us.net/ References: https://kernelci.org/build/id/5c4e4ae059b5142a249ad004/logs/ Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Cc: Maciej W. Rozycki Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.4+ --- arch/mips/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index f6fd340e39c2..314949b2261d 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -8,6 +8,7 @@ ccflags-vdso := \ $(filter -E%,$(KBUILD_CFLAGS)) \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ + $(filter -m%-float,$(KBUILD_CFLAGS)) \ -D__VDSO__ ifdef CONFIG_CC_IS_CLANG From 67fc5dc8a541e8f458d7f08bf88ff55933bf9f9d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Jan 2019 23:16:22 +0000 Subject: [PATCH 0319/1436] MIPS: VDSO: Include $(ccflags-vdso) in o32,n32 .lds builds When generating vdso-o32.lds & vdso-n32.lds for use with programs running as compat ABIs under 64b kernels, we previously haven't included the compiler flags that are supposedly common to all ABIs - ie. those in the ccflags-vdso variable. This is problematic in cases where we need to provide the -m%-float flag in order to ensure that we don't attempt to use a floating point ABI that's incompatible with the target CPU & ABI. For example a toolchain using current gcc trunk configured --with-fp-32=xx fails to build a 64r6el_defconfig kernel with the following error: cc1: error: '-march=mips1' requires '-mfp32' make[2]: *** [arch/mips/vdso/Makefile:135: arch/mips/vdso/vdso-o32.lds] Error 1 Include $(ccflags-vdso) for the compat VDSO .lds builds, just as it is included for the native VDSO .lds & when compiling objects for the compat VDSOs. This ensures we consistently provide the -msoft-float flag amongst others, avoiding the problem by ensuring we're agnostic to the toolchain defaults. Signed-off-by: Paul Burton Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO") Cc: linux-mips@vger.kernel.org Cc: Kevin Hilman Cc: Guenter Roeck Cc: Maciej W . Rozycki Cc: stable@vger.kernel.org # v4.4+ --- arch/mips/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 314949b2261d..0ede4deb8181 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -130,7 +130,7 @@ $(obj)/%-o32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := -mabi=32 +$(obj)/vdso-o32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=32 $(obj)/vdso-o32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) @@ -170,7 +170,7 @@ $(obj)/%-n32.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := -mabi=n32 +$(obj)/vdso-n32.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) -mabi=n32 $(obj)/vdso-n32.lds: $(src)/vdso.lds.S FORCE $(call if_changed_dep,cpp_lds_S) From ff9fb72bc07705c00795ca48631f7fffe24d2c6b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Jan 2019 11:28:14 +0100 Subject: [PATCH 0320/1436] debugfs: return error values, not NULL When an error happens, debugfs should return an error pointer value, not NULL. This will prevent the totally theoretical error where a debugfs call fails due to lack of memory, returning NULL, and that dentry value is then passed to another debugfs call, which would end up succeeding, creating a file at the root of the debugfs tree, but would then be impossible to remove (because you can not remove the directory NULL). So, to make everyone happy, always return errors, this makes the users of debugfs much simpler (they do not have to ever check the return value), and everyone can rest easy. Reported-by: Gary R Hook Reported-by: Heiko Carstens Reported-by: Masami Hiramatsu Reported-by: Michal Hocko Reported-by: Sebastian Andrzej Siewior Reported-by: Ulf Hansson Reviewed-by: Masami Hiramatsu Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 41ef452c1fcf..b16f8035b1af 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,8 +254,8 @@ MODULE_ALIAS_FS("debugfs"); * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file - * doesn't exist or an error occurs, %NULL will be returned. The returned - * dentry must be passed to dput() when it is no longer needed. + * doesn't exist or an error occurs, %ERR_PTR(-ERROR) will be returned. The + * returned dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -265,17 +265,17 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) struct dentry *dentry; if (IS_ERR(parent)) - return NULL; + return parent; if (!parent) parent = debugfs_mount->mnt_root; dentry = lookup_one_len_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) - return NULL; + return dentry; if (!d_really_is_positive(dentry)) { dput(dentry); - return NULL; + return ERR_PTR(-EINVAL); } return dentry; } @@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry) inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); - return NULL; + return ERR_PTR(-ENOMEM); } static struct dentry *end_creating(struct dentry *dentry) @@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, dentry = start_creating(name, parent); if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) struct inode *inode; if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name, struct inode *inode; if (IS_ERR(dentry)) - return NULL; + return dentry; inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the symbolic * link is to be removed (no automatic cleanup happens if your module is - * unloaded, you are responsible here.) If an error occurs, %NULL will be - * returned. + * unloaded, you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) + * will be returned. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, struct inode *inode; char *link = kstrdup(target, GFP_KERNEL); if (!link) - return NULL; + return ERR_PTR(-ENOMEM); dentry = start_creating(name, parent); if (IS_ERR(dentry)) { kfree(link); - return NULL; + return dentry; } inode = debugfs_get_inode(dentry->d_sb); @@ -827,7 +830,9 @@ exit: if (dentry && !IS_ERR(dentry)) dput(dentry); unlock_rename(new_dir, old_dir); - return NULL; + if (IS_ERR(dentry)) + return dentry; + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(debugfs_rename); From 7b21b69ab203136fdc153c7707fa6c409e523c2e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 27 Jan 2019 10:11:27 +0200 Subject: [PATCH 0321/1436] IB/uverbs: Fix OOPs in uverbs_user_mmap_disassociate The vma->vm_mm can become impossible to get before rdma_umap_close() is called, in this case we must not try to get an mm that is already undergoing process exit. In this case there is no need to wait for anything as the VMA will be destroyed by another thread soon and is already effectively 'unreachable' by userspace. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000012bc50067 P4D 800000012bc50067 PUD 129db5067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 2050 Comm: bash Tainted: G W OE 4.20.0-rc6+ #3 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__rb_erase_color+0xb9/0x280 Code: 84 17 01 00 00 48 3b 68 10 0f 84 15 01 00 00 48 89 58 08 48 89 de 48 89 ef 4c 89 e3 e8 90 84 22 00 e9 60 ff ff ff 48 8b 5d 10 03 01 0f 84 9c 00 00 00 48 8b 43 10 48 85 c0 74 09 f6 00 01 0f RSP: 0018:ffffbecfc090bab8 EFLAGS: 00010246 RAX: ffff97616346cf30 RBX: 0000000000000000 RCX: 0000000000000101 RDX: 0000000000000000 RSI: ffff97623b6ca828 RDI: ffff97621ef10828 RBP: ffff97621ef10828 R08: ffff97621ef10828 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff97623b6ca838 R13: ffffffffbb3fef50 R14: ffff97623b6ca828 R15: 0000000000000000 FS: 00007f7a5c31d740(0000) GS:ffff97623bb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000011255a000 CR4: 00000000000006e0 Call Trace: unlink_file_vma+0x3b/0x50 free_pgtables+0xa1/0x110 exit_mmap+0xca/0x1a0 ? mlx5_ib_dealloc_pd+0x28/0x30 [mlx5_ib] mmput+0x54/0x140 uverbs_user_mmap_disassociate+0xcc/0x160 [ib_uverbs] uverbs_destroy_ufile_hw+0xf7/0x120 [ib_uverbs] ib_uverbs_remove_one+0xea/0x240 [ib_uverbs] ib_unregister_device+0xfb/0x200 [ib_core] mlx5_ib_remove+0x51/0xe0 [mlx5_ib] mlx5_remove_device+0xc1/0xd0 [mlx5_core] mlx5_unregister_device+0x3d/0xb0 [mlx5_core] remove_one+0x2a/0x90 [mlx5_core] pci_device_remove+0x3b/0xc0 device_release_driver_internal+0x16d/0x240 unbind_store+0xb2/0x100 kernfs_fop_write+0x102/0x180 __vfs_write+0x36/0x1a0 ? __alloc_fd+0xa9/0x170 ? set_close_on_exec+0x49/0x70 vfs_write+0xad/0x1a0 ksys_write+0x52/0xc0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: # 4.19 Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 15add0688fbb..5f366838b7ff 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -967,11 +967,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) /* Get an arbitrary mm pointer that hasn't been cleaned yet */ mutex_lock(&ufile->umap_lock); - if (!list_empty(&ufile->umaps)) { - mm = list_first_entry(&ufile->umaps, - struct rdma_umap_priv, list) - ->vma->vm_mm; - mmget(mm); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + mm = NULL; + continue; + } + break; } mutex_unlock(&ufile->umap_lock); if (!mm) From b677dfae5aa197afc5191755a76a8727ffca538a Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 3 Jan 2019 23:44:11 -0600 Subject: [PATCH 0322/1436] x86/boot/compressed/64: Set EFER.LME=1 in 32-bit trampoline before returning to long mode In some old AMD KVM implementation, guest's EFER.LME bit is cleared by KVM when the hypervsior detects that the guest sets CR0.PG to 0. This causes the guest OS to reboot when it tries to return from 32-bit trampoline code because the CPU is in incorrect state: CR4.PAE=1, CR0.PG=1, CS.L=1, but EFER.LME=0. As a precaution, set EFER.LME=1 as part of long mode activation procedure. This extra step won't cause any harm when Linux is booted on a bare-metal machine. Signed-off-by: Wei Huang Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Cc: bp@alien8.de Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20190104054411.12489-1-wei@redhat.com --- arch/x86/boot/compressed/head_64.S | 8 ++++++++ arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 64037895b085..f105ae8651c9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -600,6 +600,14 @@ ENTRY(trampoline_32bit_src) leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax movl %eax, %cr3 3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ + pushl %ecx + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + popl %ecx + /* Enable PAE and LA57 (if required) paging modes */ movl $X86_CR4_PAE, %eax cmpl $0, %edx diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 91f75638f6e6..6ff7e81b5628 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x60 +#define TRAMPOLINE_32BIT_CODE_SIZE 0x70 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE From 5ccd35287edae4107475a141a477a6a4ecbe1cab Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 22 Dec 2018 19:11:16 +0000 Subject: [PATCH 0323/1436] x86/fault: Fix sign-extend unintended sign extension show_ldttss() shifts desc.base2 by 24 bit, but base2 is 8 bits of a bitfield in a u16. Due to the really great idea of integer promotion in C99 base2 is promoted to an int, because that's the standard defined behaviour when all values which can be represented by base2 fit into an int. Now if bit 7 is set in desc.base2 the result of the shift left by 24 makes the resulting integer negative and the following conversion to unsigned long legitmately sign extends first causing the upper bits 32 bits to be set in the result. Fix this by casting desc.base2 to unsigned long before the shift. Detected by CoverityScan, CID#1475635 ("Unintended sign extension") [ tglx: Reworded the changelog a bit as I actually had to lookup the standard (again) to decode the original one. ] Fixes: a1a371c468f7 ("x86/fault: Decode page fault OOPSes better") Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20181222191116.21831-1-colin.king@canonical.com --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2ff25ad33233..9d5c75f02295 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -595,7 +595,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24); + addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24); #ifdef CONFIG_X86_64 addr |= ((u64)desc.base3 << 32); #endif From 34d66caf251df91ff27b24a3a786810d29989eca Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 17 Jan 2019 02:10:59 -0800 Subject: [PATCH 0324/1436] x86/speculation: Remove redundant arch_smt_update() invocation With commit a74cfffb03b7 ("x86/speculation: Rework SMT state change"), arch_smt_update() is invoked from each individual CPU hotplug function. Therefore the extra arch_smt_update() call in the sysfs SMT control is redundant. Fixes: a74cfffb03b7 ("x86/speculation: Rework SMT state change") Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: Cc: Cc: Link: https://lkml.kernel.org/r/e2e064f2-e8ef-42ca-bf4f-76b612964752@default --- kernel/cpu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 91d5c38eb7e5..c0c7f64573ed 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2090,10 +2090,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) { + if (!ret) cpu_smt_control = ctrlval; - arch_smt_update(); - } cpu_maps_update_done(); return ret; } @@ -2104,7 +2102,6 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; - arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) From 8fc75bed96bb94e23ca51bd9be4daf65c57697bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jan 2019 15:52:55 -0500 Subject: [PATCH 0325/1436] NFS: Fix up return value on fatal errors in nfs_page_async_flush() Ensure that we return the fatal error value that caused us to exit nfs_page_async_flush(). Fixes: c373fff7bd25 ("NFSv4: Don't special case "launder"") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.12+ Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5a0bbf917a32..f12cb31a41e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -621,11 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_set_page_writeback(page); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); - ret = 0; + ret = req->wb_context->error; /* If there is a fatal error that covers this write, just exit */ - if (nfs_error_is_fatal_on_server(req->wb_context->error)) + if (nfs_error_is_fatal_on_server(ret)) goto out_launder; + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* @@ -635,9 +636,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, nfs_context_set_write_error(req->wb_context, ret); if (nfs_error_is_fatal_on_server(ret)) goto out_launder; - } + } else + ret = -EAGAIN; nfs_redirty_request(req); - ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); From 58d15ed1203f4d858c339ea4d7dafa94bd2a56d3 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 29 Jan 2019 12:46:16 +1000 Subject: [PATCH 0326/1436] cifs: fix computation for MAX_SMB2_HDR_SIZE The size of the fixed part of the create response is 88 bytes not 56. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 7a2d0a2255e6..e9cc6775df21 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) From c4627e66f73a28c5515b908c90c2bf7120086497 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 29 Jan 2019 12:46:17 +1000 Subject: [PATCH 0327/1436] cifs: limit amount of data we request for xattrs to CIFSMaxBufSize minus the various headers and blobs that will be part of the reply. or else we might trigger a session reconnect. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 4 +++- fs/cifs/smb2pdu.h | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 153238fc4fa9..6f96e2292856 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -866,7 +866,9 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, FILE_READ_EA, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, - SMB2_MAX_EA_BUF, + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE, &rsp_iov, &buftype, cifs_sb); if (rc) { /* diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e9cc6775df21..538e2299805f 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -85,6 +85,7 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 /* 52 transform hdr + 64 hdr + 88 create rsp */ +#define SMB2_TRANSFORM_HEADER_SIZE 52 #define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) @@ -648,6 +649,13 @@ struct smb2_create_req { __u8 Buffer[0]; } __packed; +/* + * Maximum size of a SMB2_CREATE response is 64 (smb2 header) + + * 88 (fixed part of create response) + 520 (path) + 150 (contexts) + + * 2 bytes of padding. + */ +#define MAX_SMB2_CREATE_RESPONSE_SIZE 824 + struct smb2_create_rsp { struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 89 */ @@ -996,6 +1004,11 @@ struct smb2_close_req { __u64 VolatileFileId; /* opaque endianness */ } __packed; +/* + * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data) + */ +#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124 + struct smb2_close_rsp { struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* 60 */ @@ -1398,8 +1411,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MAX_EA_BUF 65536 - struct smb2_file_full_ea_info { /* encoding of response for level 15 */ __le32 next_entry_offset; __u8 flags; From 9bda8723da2d55b1de833b98cf802b88006e5b69 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 23 Jan 2019 17:12:09 -0800 Subject: [PATCH 0328/1436] CIFS: Fix possible oops and memory leaks in async IO Allocation of a page array for non-cached IO was separated from allocation of rdata and wdata structures and this introduced memory leaks and a possible null pointer dereference. This patch fixes these problems. Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2c7689f3998d..659ce1b92c44 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2696,6 +2696,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, rc = cifs_write_allocate_pages(wdata->pages, nr_pages); if (rc) { + kvfree(wdata->pages); kfree(wdata); add_credits_and_wake_if(server, credits, 0); break; @@ -2707,6 +2708,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (rc) { for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); + kvfree(wdata->pages); kfree(wdata); add_credits_and_wake_if(server, credits, 0); break; @@ -3386,8 +3388,12 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, } rc = cifs_read_allocate_pages(rdata, npages); - if (rc) - goto error; + if (rc) { + kvfree(rdata->pages); + kfree(rdata); + add_credits_and_wake_if(server, credits, 0); + break; + } rdata->tailsz = PAGE_SIZE; } @@ -3407,7 +3413,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, if (!rdata->cfile->invalidHandle || !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); -error: if (rc) { add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, From 7d42e72fe8ee5ab70b1af843dd7d8615e6fb0abe Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 25 Jan 2019 11:38:53 -0800 Subject: [PATCH 0329/1436] CIFS: Fix trace command logging for SMB2 reads and writes Currently we log success once we send an async IO request to the server. Instead we need to analyse a response and then log success or failure for a particular command. Also fix argument list for read logging. Cc: # 4.18 Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2ff209ec4fab..4b5ab9c80cc3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3241,8 +3241,17 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->mr = NULL; } #endif - if (rdata->result) + if (rdata->result) { cifs_stats_fail_inc(tcon, SMB2_READ_HE); + trace_smb3_read_err(0 /* xid */, + rdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, rdata->offset, + rdata->bytes, rdata->result); + } else + trace_smb3_read_done(0 /* xid */, + rdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, + rdata->offset, rdata->got_bytes); queue_work(cifsiod_wq, &rdata->work); DeleteMidQEntry(mid); @@ -3317,13 +3326,11 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rc) { kref_put(&rdata->refcount, cifs_readdata_release); cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); - trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid, - io_parms.tcon->tid, io_parms.tcon->ses->Suid, - io_parms.offset, io_parms.length); - } else - trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid, - io_parms.tcon->tid, io_parms.tcon->ses->Suid, - io_parms.offset, io_parms.length); + trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid, + io_parms.tcon->tid, + io_parms.tcon->ses->Suid, + io_parms.offset, io_parms.length, rc); + } cifs_small_buf_release(buf); return rc; @@ -3367,10 +3374,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, if (rc != -ENODATA) { cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); cifs_dbg(VFS, "Send error in read = %d\n", rc); + trace_smb3_read_err(xid, req->PersistentFileId, + io_parms->tcon->tid, ses->Suid, + io_parms->offset, io_parms->length, + rc); } - trace_smb3_read_err(rc, xid, req->PersistentFileId, - io_parms->tcon->tid, ses->Suid, - io_parms->offset, io_parms->length); free_rsp_buf(resp_buftype, rsp_iov.iov_base); return rc == -ENODATA ? 0 : rc; } else @@ -3459,8 +3467,17 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->mr = NULL; } #endif - if (wdata->result) + if (wdata->result) { cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); + trace_smb3_write_err(0 /* no xid */, + wdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, wdata->offset, + wdata->bytes, wdata->result); + } else + trace_smb3_write_done(0 /* no xid */, + wdata->cfile->fid.persistent_fid, + tcon->tid, tcon->ses->Suid, + wdata->offset, wdata->bytes); queue_work(cifsiod_wq, &wdata->work); DeleteMidQEntry(mid); @@ -3602,10 +3619,7 @@ smb2_async_writev(struct cifs_writedata *wdata, wdata->bytes, rc); kref_put(&wdata->refcount, release); cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); - } else - trace_smb3_write_done(0 /* no xid */, req->PersistentFileId, - tcon->tid, tcon->ses->Suid, wdata->offset, - wdata->bytes); + } async_writev_out: cifs_small_buf_release(req); From 8e6e72aeceaaed5aeeb1cb43d3085de7ceb14f79 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 26 Jan 2019 12:21:32 -0800 Subject: [PATCH 0330/1436] CIFS: Do not count -ENODATA as failure for query directory Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4b5ab9c80cc3..d858dc04fdc3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3845,8 +3845,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { srch_inf->endOfSearch = true; rc = 0; - } - cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); + } else + cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } From 082aaa8700415f6471ec9c5ef0c8307ca214989a Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jan 2019 15:54:34 -0800 Subject: [PATCH 0331/1436] CIFS: Do not consider -ENODATA as stat failure for reads When doing reads beyound the end of a file the server returns error STATUS_END_OF_FILE error which is mapped to -ENODATA. Currently we report it as a failure which confuses read stats. Change it to not consider -ENODATA as failure for stat purposes. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d858dc04fdc3..ef52d6642431 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3241,7 +3241,7 @@ smb2_readv_callback(struct mid_q_entry *mid) rdata->mr = NULL; } #endif - if (rdata->result) { + if (rdata->result && rdata->result != -ENODATA) { cifs_stats_fail_inc(tcon, SMB2_READ_HE); trace_smb3_read_err(0 /* xid */, rdata->cfile->fid.persistent_fid, From 81f5c6f5db37bf2360b64c304b27b8f499b48367 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 29 Jan 2019 16:38:16 -0800 Subject: [PATCH 0332/1436] bpf: btf: allow typedef func_proto Current implementation does not allow typedef func_proto. But it is actually allowed. -bash-4.4$ cat t.c typedef int (f) (int); f *g; -bash-4.4$ clang -O2 -g -c -target bpf t.c -Xclang -target-feature -Xclang +dwarfris -bash-4.4$ pahole -JV t.o File t.o: [1] PTR (anon) type_id=2 [2] TYPEDEF f type_id=3 [3] FUNC_PROTO (anon) return=4 args=(4 (anon)) [4] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED -bash-4.4$ This patch related btf verifier to allow such (typedef func_proto) patterns. Fixes: 2667a2626f4d ("bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO") Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index befe570be5ba..c57bd10340ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, /* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && - !btf_type_is_fwd(next_type)) { + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } From 32b750b925040565bbeff1fe2f7510e3dc71ce2e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 29 Jan 2019 16:38:17 -0800 Subject: [PATCH 0333/1436] tools/bpf: fix test_btf for typedef func_proto case Fixed one test_btf raw test such that typedef func_proto is permitted now. Fixes: 78a2540e8945 ("tools/bpf: Add tests for BTF_KIND_FUNC_PROTO and BTF_KIND_FUNC") Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index a0bd04befe87..91420fa83b08 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1881,13 +1881,12 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_CONST_ENC(4), /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ BTF_FUNC_PROTO_ARG_ENC(0, 1), BTF_FUNC_PROTO_ARG_ENC(0, 2), BTF_END_RAW, @@ -1901,8 +1900,6 @@ static struct btf_raw_test raw_tests[] = { .key_type_id = 1, .value_type_id = 1, .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", }, { From 1575c083a78cc1137ad8bfa042d02eeb9dc86ef4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 13:57:19 -0600 Subject: [PATCH 0334/1436] tty: serial: 8250_mtk: Fix potential NULL pointer dereference There is a potential NULL pointer dereference in case devm_kzalloc() fails and returns NULL. Fix this by adding a NULL check on data->dma This bug was detected with the help of Coccinelle. Fixes: 85b5c1dd0456 ("serial: 8250-mtk: add uart DMA support") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index e2c407656fa6..c1fdbc0b6840 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -357,6 +357,9 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p, if (dmacnt == 2) { data->dma = devm_kzalloc(&pdev->dev, sizeof(*data->dma), GFP_KERNEL); + if (!data->dma) + return -ENOMEM; + data->dma->fn = mtk8250_dma_filter; data->dma->rx_size = MTK_UART_RX_SIZE; data->dma->rxconf.src_maxburst = MTK_UART_RX_TRIGGER; From 824d17c57b0abbcb9128fb3f7327fae14761914b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Jan 2019 23:51:21 +0200 Subject: [PATCH 0335/1436] serial: 8250_pci: Make PCI class test non fatal As has been reported the National Instruments serial cards have broken PCI class. The commit 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") made the PCI class check mandatory for the case when device is listed in a quirk list. Make PCI class test non fatal to allow broken card be enumerated. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Cc: stable Reported-by: Guan Yung Tseng Tested-by: Guan Yung Tseng Tested-by: KHUENY.Gerhard Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index f80a300b5d68..48bd694a5fa1 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3420,6 +3420,11 @@ static int serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; + int rc; + + rc = serial_pci_is_class_communication(dev); + if (rc) + return rc; /* * Should we try to make guesses for multiport serial devices later? @@ -3647,10 +3652,6 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) board = &pci_boards[ent->driver_data]; - rc = serial_pci_is_class_communication(dev); - if (rc) - return rc; - rc = serial_pci_is_blacklisted(dev); if (rc) return rc; From 4d95987a32db53f3beca76f8c4c8309ef6a5f192 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Mon, 28 Jan 2019 13:25:56 -0500 Subject: [PATCH 0336/1436] serial: sh-sci: Do not free irqs that have already been freed Since IRQs might be muxed on some parts, we need to pay attention when we are freeing them. Otherwise we get the ugly WARNING "Trying to free already-free IRQ 20". Fixes: 628c534ae735 ("serial: sh-sci: Improve support for separate TEI and DRI interrupts") Cc: stable Signed-off-by: Chris Brandt Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 8df0fd824520..64bbeb7d7e0c 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1921,7 +1921,7 @@ out_nomem: static void sci_free_irq(struct sci_port *port) { - int i; + int i, j; /* * Intentionally in reverse order so we iterate over the muxed @@ -1937,6 +1937,13 @@ static void sci_free_irq(struct sci_port *port) if (unlikely(irq < 0)) continue; + /* Check if already freed (irq was muxed) */ + for (j = 0; j < i; j++) + if (port->irqs[j] == irq) + j = i + 1; + if (j > i) + continue; + free_irq(port->irqs[i], port); kfree(port->irqstr[i]); From db4090920ba2d61a5827a23e441447926a02ffee Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 25 Jan 2019 20:10:15 +0000 Subject: [PATCH 0337/1436] ARM: iop32x/n2100: fix PCI IRQ mapping Booting 4.20 on a TheCUS N2100 results in a kernel oops while probing PCI, due to n2100_pci_map_irq() having been discarded during boot. Signed-off-by: Russell King Cc: stable@vger.kernel.org # 2.6.18+ Signed-off-by: Arnd Bergmann --- arch/arm/mach-iop32x/n2100.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index 3b73813c6b04..23e8c93515d4 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -75,8 +75,7 @@ void __init n2100_map_io(void) /* * N2100 PCI. */ -static int __init -n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int n2100_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; From 46edb8d1322c1763dd04e179992f8e9996085047 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 22 Jan 2019 11:35:25 +0000 Subject: [PATCH 0338/1436] firmware: arm_scmi: provide the mandatory device release callback The device/driver model clearly mandates that bus driver that discover and allocate the device must set the release callback. This callback will be used to free the device after all references have gone away. scmi bus driver is missing the obvious callback which will result in the following warning if the device is unregistered: Device 'scmi_dev.1' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt. WARNING at drivers/base/core.c:922 device_release+0x8c/0xa0 Hardware name: ARM LTD Juno Development Platform BIOS EDK II Jan 21 2019 Workqueue: events deferred_probe_work_func pstate: 60000005 (nZCv daif -PAN -UAO) pc : device_release+0x8c/0xa0 lr : device_release+0x8c/0xa0 Call trace: device_release+0x8c/0xa0 kobject_put+0x8c/0x208 device_unregister+0x30/0x78 scmi_device_destroy+0x28/0x50 scmi_probe+0x354/0x5b0 platform_drv_probe+0x58/0xa8 really_probe+0x2c4/0x3e8 driver_probe_device+0x12c/0x148 __device_attach_driver+0xac/0x150 bus_for_each_drv+0x78/0xd8 __device_attach+0xe0/0x168 device_initial_probe+0x24/0x30 bus_probe_device+0xa0/0xa8 deferred_probe_work_func+0x8c/0xe0 process_one_work+0x1f0/0x478 worker_thread+0x22c/0x450 kthread+0x134/0x138 ret_from_fork+0x10/0x1c ---[ end trace 420bdb7f6af50937 ]--- Fix the issue by providing scmi_device_release callback. We have everything required for device release already in scmi_device_destroy, so we just need to move freeing of the device to scmi_device_release. Fixes: 933c504424a2 ("firmware: arm_scmi: add scmi protocol bus to enumerate protocol devices") Signed-off-by: Sudeep Holla Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Arnd Bergmann --- drivers/firmware/arm_scmi/bus.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 472c88ae1c0f..92f843eaf1e0 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -119,6 +119,11 @@ void scmi_driver_unregister(struct scmi_driver *driver) } EXPORT_SYMBOL_GPL(scmi_driver_unregister); +static void scmi_device_release(struct device *dev) +{ + kfree(to_scmi_dev(dev)); +} + struct scmi_device * scmi_device_create(struct device_node *np, struct device *parent, int protocol) { @@ -138,6 +143,7 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol) scmi_dev->dev.parent = parent; scmi_dev->dev.of_node = np; scmi_dev->dev.bus = &scmi_bus_type; + scmi_dev->dev.release = scmi_device_release; dev_set_name(&scmi_dev->dev, "scmi_dev.%d", id); retval = device_register(&scmi_dev->dev); @@ -156,9 +162,8 @@ free_mem: void scmi_device_destroy(struct scmi_device *scmi_dev) { scmi_handle_put(scmi_dev->handle); - device_unregister(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, scmi_dev->id); - kfree(scmi_dev); + device_unregister(&scmi_dev->dev); } void scmi_set_handle(struct scmi_device *scmi_dev) From d0f9f16788e15d9eb40f68b047732d49658c5a3a Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Wed, 16 Jan 2019 16:49:58 +0100 Subject: [PATCH 0339/1436] ARM: tango: Improve ARCH_MULTIPLATFORM compatibility Calling platform-specific code unconditionally blows up when running an ARCH_MULTIPLATFORM kernel on a different platform. Don't do it. Reported-by: Paolo Pisati Signed-off-by: Marc Gonzalez Acked-by: Pavel Machek Cc: stable@vger.kernel.org # v4.8+ Fixes: a30eceb7a59d ("ARM: tango: add Suspend-to-RAM support") Signed-off-by: Arnd Bergmann --- arch/arm/mach-tango/pm.c | 6 ++---- arch/arm/mach-tango/pm.h | 7 +++++++ arch/arm/mach-tango/setup.c | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 arch/arm/mach-tango/pm.h diff --git a/arch/arm/mach-tango/pm.c b/arch/arm/mach-tango/pm.c index 028e50c6383f..a32c3b631484 100644 --- a/arch/arm/mach-tango/pm.c +++ b/arch/arm/mach-tango/pm.c @@ -3,6 +3,7 @@ #include #include #include "smc.h" +#include "pm.h" static int tango_pm_powerdown(unsigned long arg) { @@ -24,10 +25,7 @@ static const struct platform_suspend_ops tango_pm_ops = { .valid = suspend_valid_only_mem, }; -static int __init tango_pm_init(void) +void __init tango_pm_init(void) { suspend_set_ops(&tango_pm_ops); - return 0; } - -late_initcall(tango_pm_init); diff --git a/arch/arm/mach-tango/pm.h b/arch/arm/mach-tango/pm.h new file mode 100644 index 000000000000..35ea705a0ee2 --- /dev/null +++ b/arch/arm/mach-tango/pm.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CONFIG_SUSPEND +void __init tango_pm_init(void); +#else +#define tango_pm_init NULL +#endif diff --git a/arch/arm/mach-tango/setup.c b/arch/arm/mach-tango/setup.c index 677dd7b5efd9..824f90737b04 100644 --- a/arch/arm/mach-tango/setup.c +++ b/arch/arm/mach-tango/setup.c @@ -2,6 +2,7 @@ #include #include #include "smc.h" +#include "pm.h" static void tango_l2c_write(unsigned long val, unsigned int reg) { @@ -15,4 +16,5 @@ DT_MACHINE_START(TANGO_DT, "Sigma Tango DT") .dt_compat = tango_dt_compat, .l2c_aux_mask = ~0, .l2c_write_sec = tango_l2c_write, + .init_late = tango_pm_init, MACHINE_END From 0b053bbf07867c9942d706e70040398df59de54b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 24 Jan 2019 11:18:48 +0100 Subject: [PATCH 0340/1436] MAINTAINERS: Add Andy and Darren as arch/x86/platform/ reviewers ... so that they can get CCed on platform patches. Signed-off-by: Borislav Petkov Acked-by: Andy Shevchenko Acked-by: Thomas Gleixner Cc: Darren Hart Cc: Andy Shevchenko Cc: x86@kernel.org Link: https://lkml.kernel.org/r/20190128113619.19025-1-bp@alien8.de --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 32d444476a90..a98a7325df8c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16631,6 +16631,15 @@ S: Maintained F: drivers/platform/x86/ F: drivers/platform/olpc/ +X86 PLATFORM DRIVERS - ARCH +R: Darren Hart +R: Andy Shevchenko +L: platform-driver-x86@vger.kernel.org +L: x86@kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core +S: Maintained +F: arch/x86/platform + X86 VDSO M: Andy Lutomirski L: linux-kernel@vger.kernel.org From a1960e0f1639cb1f7a3d94521760fc73091f6640 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 30 Jan 2019 10:49:34 +0100 Subject: [PATCH 0341/1436] staging: speakup: fix tty-operation NULL derefs The send_xchar() and tiocmset() tty operations are optional. Add the missing sanity checks to prevent user-space triggerable NULL-pointer dereferences. Fixes: 6b9ad1c742bf ("staging: speakup: add send_xchar, tiocmset and input functionality for tty") Cc: stable # 4.13 Cc: Okash Khawaja Cc: Samuel Thibault Signed-off-by: Johan Hovold Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/spk_ttyio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index c92bbd05516e..005de0024dd4 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -265,7 +265,8 @@ static void spk_ttyio_send_xchar(char ch) return; } - speakup_tty->ops->send_xchar(speakup_tty, ch); + if (speakup_tty->ops->send_xchar) + speakup_tty->ops->send_xchar(speakup_tty, ch); mutex_unlock(&speakup_tty_mutex); } @@ -277,7 +278,8 @@ static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear) return; } - speakup_tty->ops->tiocmset(speakup_tty, set, clear); + if (speakup_tty->ops->tiocmset) + speakup_tty->ops->tiocmset(speakup_tty, set, clear); mutex_unlock(&speakup_tty_mutex); } From 37ea7b630ae5cdea4e8ff381d9d23abfef5939e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Jan 2019 12:37:35 +0100 Subject: [PATCH 0342/1436] debugfs: debugfs_lookup() should return NULL if not found Lots of callers of debugfs_lookup() were just checking NULL to see if the file/directory was found or not. By changing this in ff9fb72bc077 ("debugfs: return error values, not NULL") we caused some subsystems to easily crash. Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Reported-by: syzbot+b382ba6a802a3d242790@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Cc: Omar Sandoval Cc: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b16f8035b1af..29c68c5d44d5 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -254,8 +254,8 @@ MODULE_ALIAS_FS("debugfs"); * @parent: a pointer to the parent dentry of the file. * * This function will return a pointer to a dentry if it succeeds. If the file - * doesn't exist or an error occurs, %ERR_PTR(-ERROR) will be returned. The - * returned dentry must be passed to dput() when it is no longer needed. + * doesn't exist or an error occurs, %NULL will be returned. The returned + * dentry must be passed to dput() when it is no longer needed. * * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. @@ -265,17 +265,17 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) struct dentry *dentry; if (IS_ERR(parent)) - return parent; + return NULL; if (!parent) parent = debugfs_mount->mnt_root; dentry = lookup_one_len_unlocked(name, parent, strlen(name)); if (IS_ERR(dentry)) - return dentry; + return NULL; if (!d_really_is_positive(dentry)) { dput(dentry); - return ERR_PTR(-EINVAL); + return NULL; } return dentry; } From 75abec73de65a86a23019ee7a3569220ee344f37 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 17 Jan 2019 12:48:54 +0100 Subject: [PATCH 0343/1436] selftests: add binderfs selftests This adds the promised selftest for binderfs. It will verify the following things: - binderfs mounting works - binder device allocation works - performing a binder ioctl() request through a binderfs device works - binder device removal works - binder-control removal fails - binderfs unmounting works The tests are performed both privileged and unprivileged. The latter verifies that binderfs behaves correctly in user namespaces. Cc: Todd Kjos Signed-off-by: Christian Brauner Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 1 + .../selftests/filesystems/binderfs/.gitignore | 1 + .../selftests/filesystems/binderfs/Makefile | 6 + .../filesystems/binderfs/binderfs_test.c | 275 ++++++++++++++++++ .../selftests/filesystems/binderfs/config | 3 + 5 files changed, 286 insertions(+) create mode 100644 tools/testing/selftests/filesystems/binderfs/.gitignore create mode 100644 tools/testing/selftests/filesystems/binderfs/Makefile create mode 100644 tools/testing/selftests/filesystems/binderfs/binderfs_test.c create mode 100644 tools/testing/selftests/filesystems/binderfs/config diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1a2bd15c5b6e..400ee81a3043 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -10,6 +10,7 @@ TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec TARGETS += filesystems +TARGETS += filesystems/binderfs TARGETS += firmware TARGETS += ftrace TARGETS += futex diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore new file mode 100644 index 000000000000..8a5d9bf63dd4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/.gitignore @@ -0,0 +1 @@ +binderfs_test diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile new file mode 100644 index 000000000000..58cb659b56b4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := binderfs_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c new file mode 100644 index 000000000000..8c2ed962e1c7 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../kselftest.h" + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static void write_to_file(const char *filename, const void *buf, size_t count, + int allowed_errno) +{ + int fd, saved_errno; + ssize_t ret; + + fd = open(filename, O_WRONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg("%s - Failed to open file %s\n", + strerror(errno), filename); + + ret = write_nointr(fd, buf, count); + if (ret < 0) { + if (allowed_errno && (errno == allowed_errno)) { + close(fd); + return; + } + + goto on_error; + } + + if ((size_t)ret != count) + goto on_error; + + close(fd); + return; + +on_error: + saved_errno = errno; + close(fd); + errno = saved_errno; + + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to write to file %s\n", + strerror(errno), filename); + + ksft_exit_fail_msg("Failed to write to file %s\n", filename); +} + +static void change_to_userns(void) +{ + int ret; + uid_t uid; + gid_t gid; + /* {g,u}id_map files only allow a max of 4096 bytes written to them */ + char idmap[4096]; + + uid = getuid(); + gid = getgid(); + + ret = unshare(CLONE_NEWUSER); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", + strerror(errno)); + + write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0); + + ret = setgid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); + + ret = setuid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); +} + +static void change_to_mountns(void) +{ + int ret; + + ret = unshare(CLONE_NEWNS); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n", + strerror(errno)); + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to mount / as private\n", + strerror(errno)); +} + +static void rmdir_protect_errno(const char *dir) +{ + int saved_errno = errno; + (void)rmdir(dir); + errno = saved_errno; +} + +static void __do_binderfs_test(void) +{ + int fd, ret, saved_errno; + size_t len; + ssize_t wret; + bool keep = false; + struct binderfs_device device = { 0 }; + struct binder_version version = { 0 }; + + change_to_mountns(); + + ret = mkdir("/dev/binderfs", 0755); + if (ret < 0) { + if (errno != EEXIST) + ksft_exit_fail_msg( + "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + + keep = true; + } + + ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + if (ret < 0) { + if (errno != ENODEV) + ksft_exit_fail_msg("%s - Failed to mount binderfs\n", + strerror(errno)); + + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_skip( + "The Android binderfs filesystem is not available\n"); + } + + /* binderfs mount test passed */ + ksft_inc_pass_cnt(); + + memcpy(device.name, "my-binder", strlen("my-binder")); + + fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg( + "%s - Failed to open binder-control device\n", + strerror(errno)); + + ret = ioctl(fd, BINDER_CTL_ADD, &device); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to allocate new binder device\n", + strerror(errno)); + } + + ksft_print_msg( + "Allocated new binder device with major %d, minor %d, and name %s\n", + device.major, device.minor, device.name); + + /* binder device allocation test passed */ + ksft_inc_pass_cnt(); + + fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY); + if (fd < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to open my-binder device\n", + strerror(errno)); + } + + ret = ioctl(fd, BINDER_VERSION, &version); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to open perform BINDER_VERSION request\n", + strerror(errno)); + } + + ksft_print_msg("Detected binder version: %d\n", + version.protocol_version); + + /* binder transaction with binderfs binder device passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/my-binder"); + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to delete binder device\n", + strerror(errno)); + } + + /* binder device removal passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/binder-control"); + if (!ret) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("Managed to delete binder-control device\n"); + } else if (errno != EPERM) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to delete binder-control device but exited with unexpected error code\n", + strerror(errno)); + } + + /* binder-control device removal failed as expected */ + ksft_inc_xfail_cnt(); + +on_error: + ret = umount2("/dev/binderfs", MNT_DETACH); + keep ?: rmdir_protect_errno("/dev/binderfs"); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", + strerror(errno)); + + /* binderfs unmount test passed */ + ksft_inc_pass_cnt(); +} + +static void binderfs_test_privileged() +{ + if (geteuid() != 0) + ksft_print_msg( + "Tests are not run as root. Skipping privileged tests\n"); + else + __do_binderfs_test(); +} + +static void binderfs_test_unprivileged() +{ + change_to_userns(); + __do_binderfs_test(); +} + +int main(int argc, char *argv[]) +{ + binderfs_test_privileged(); + binderfs_test_unprivileged(); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config new file mode 100644 index 000000000000..02dd6cc9cf99 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/config @@ -0,0 +1,3 @@ +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_ANDROID_BINDER_IPC=y From 793c8232937610ae00bc174b87d7fc324346eaea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Jan 2019 11:23:20 +0100 Subject: [PATCH 0344/1436] binder: fix CONFIG_ANDROID_BINDER_DEVICES Several users have tried to only rely on binderfs to provide binder devices and set CONFIG_ANDROID_BINDER_DEVICES="" empty. This is a great use-case of binderfs and one that was always intended to work. However, this is currently not possible since setting CONFIG_ANDROID_BINDER_DEVICES="" emtpy will simply panic the kernel: kobject: (00000000028c2f79): attempted to be registered with empty name! WARNING: CPU: 7 PID: 1703 at lib/kobject.c:228 kobject_add_internal+0x288/0x2b0 Modules linked in: binder_linux(+) bridge stp llc ipmi_ssif gpio_ich dcdbas coretemp kvm_intel kvm irqbypass serio_raw input_leds lpc_ich i5100_edac mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel ib_i CPU: 7 PID: 1703 Comm: modprobe Not tainted 5.0.0-rc2-brauner-binderfs #263 Hardware name: Dell DCS XS24-SC2 /XS24-SC2 , BIOS S59_3C20 04/07/2011 RIP: 0010:kobject_add_internal+0x288/0x2b0 Code: 12 95 48 c7 c7 78 63 3b 95 e8 77 35 71 ff e9 91 fe ff ff 0f 0b eb a7 0f 0b eb 9a 48 89 de 48 c7 c7 00 63 3b 95 e8 f8 95 6a ff <0f> 0b 41 bc ea ff ff ff e9 6d fe ff ff 41 bc fe ff ff ff e9 62 fe RSP: 0018:ffff973f84237a30 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8b53e2472010 RCX: 0000000000000006 RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff8b53edbd63a0 RBP: ffff973f84237a60 R08: 0000000000000342 R09: 0000000000000004 R10: ffff973f84237af0 R11: 0000000000000001 R12: 0000000000000000 R13: ffff8b53e9f1a1e0 R14: 00000000e9f1a1e0 R15: 0000000000a00037 FS: 00007fbac36f7540(0000) GS:ffff8b53edbc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbac364cfa7 CR3: 00000004a6d48000 CR4: 00000000000406e0 Call Trace: kobject_add+0x71/0xd0 ? _cond_resched+0x19/0x40 ? mutex_lock+0x12/0x40 device_add+0x12e/0x6b0 device_create_groups_vargs+0xe4/0xf0 device_create_with_groups+0x3f/0x60 ? _cond_resched+0x19/0x40 misc_register+0x140/0x180 binder_init+0x1ed/0x2d4 [binder_linux] ? trace_event_define_fields_binder_transaction_fd_send+0x8e/0x8e [binder_linux] do_one_initcall+0x4a/0x1c9 ? _cond_resched+0x19/0x40 ? kmem_cache_alloc_trace+0x151/0x1c0 do_init_module+0x5f/0x216 load_module+0x223d/0x2b20 __do_sys_finit_module+0xfc/0x120 ? __do_sys_finit_module+0xfc/0x120 __x64_sys_finit_module+0x1a/0x20 do_syscall_64+0x5a/0x120 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fbac3202839 Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffd1494a908 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000055b629ebec60 RCX: 00007fbac3202839 RDX: 0000000000000000 RSI: 000055b629c20d2e RDI: 0000000000000003 RBP: 000055b629c20d2e R08: 0000000000000000 R09: 000055b629ec2310 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 000055b629ebed70 R14: 0000000000040000 R15: 000055b629ebec60 So check for the empty string since strsep() will otherwise return the emtpy string which will cause kobject_add_internal() to panic when trying to add a kobject with an emtpy name. Fixes: ac4812c5ffbb ("binder: Support multiple /dev instances") Cc: Martijn Coenen Signed-off-by: Christian Brauner Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cdfc87629efb..57cf259de600 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5898,21 +5898,23 @@ static int __init binder_init(void) &transaction_log_fops); } - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kstrdup(binder_devices_param, GFP_KERNEL); - if (!device_names) { - ret = -ENOMEM; - goto err_alloc_device_names_failed; - } + if (strcmp(binder_devices_param, "") != 0) { + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kstrdup(binder_devices_param, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } - device_tmp = device_names; - while ((device_name = strsep(&device_tmp, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; + device_tmp = device_names; + while ((device_name = strsep(&device_tmp, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } } return ret; From da8ddba566ff0a883237dbc8c5dadef1ca769e19 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 23 Jan 2019 12:41:15 +0100 Subject: [PATCH 0345/1436] binderfs: respect limit on binder control creation We currently adhere to the reserved devices limit when creating new binderfs devices in binderfs instances not located in the inital ipc namespace. But it is still possible to rob the host instances of their 4 reserved devices by creating the maximum allowed number of devices in a single binderfs instance located in a non-initial ipc namespace and then mounting 4 separate binderfs instances in non-initial ipc namespaces. That happens because the limit is currently not respected for the creation of the initial binder-control device node. Block this nonsense by performing the same check in binderfs_binder_ctl_create() that we perform in binderfs_binder_device_create(). Fixes: 36bdf3cae09d ("binderfs: reserve devices for initial mount") Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 6a2185eb66c5..7a550104a722 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -395,6 +395,11 @@ static int binderfs_binder_ctl_create(struct super_block *sb) struct inode *inode = NULL; struct dentry *root = sb->s_root; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) @@ -413,7 +418,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb) /* Reserve a new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL); + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); mutex_unlock(&binderfs_minors_mutex); if (minor < 0) { ret = minor; From efe814e90b98aed6d655b5a4092b9114b8b26e42 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 24 Jan 2019 14:45:02 +0200 Subject: [PATCH 0346/1436] mei: me: add ice lake point device id. Add icelake mei device id. Cc: Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 23739a60517f..bb1ee9834a02 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -139,6 +139,8 @@ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ #define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ +#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index e89497f858ae..3ab946ad3257 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -105,6 +105,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + /* required last entry */ {0, } }; From c4a46acf1db3ce547d290c29e55b3476c78dd76c Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 24 Jan 2019 14:45:03 +0200 Subject: [PATCH 0347/1436] samples: mei: use /dev/mei0 instead of /dev/mei The device was moved from misc device to character devices to support multiple mei devices. Cc: #v4.9+ Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- samples/mei/mei-amt-version.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index 33e67bd1dc34..32234481ad7d 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -117,7 +117,7 @@ static bool mei_init(struct mei *me, const uuid_le *guid, me->verbose = verbose; - me->fd = open("/dev/mei", O_RDWR); + me->fd = open("/dev/mei0", O_RDWR); if (me->fd == -1) { mei_err(me, "Cannot establish a handle to the Intel MEI driver\n"); goto err; From cee4c4d63ba7b0df9b2d2a6724c41b2a260d72ec Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 30 Jan 2019 10:12:26 +0200 Subject: [PATCH 0348/1436] mei: free read cb on ctrl_wr list flush There is a little window during disconnection flow when read cb is moved between lists and may be not freed. Remove moving read cbs explicitly during flash fixes this memory leak. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 1fc8ea0f519b..ca4c9cc218a2 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -401,8 +401,11 @@ static void mei_io_list_flush_cl(struct list_head *head, struct mei_cl_cb *cb, *next; list_for_each_entry_safe(cb, next, head, list) { - if (cl == cb->cl) + if (cl == cb->cl) { list_del_init(&cb->list); + if (cb->fop_type == MEI_FOP_READ) + mei_io_cb_free(cb); + } } } From 49230b49c4398765feba809ac27ae09562cbead1 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 27 Jan 2019 21:54:50 +0200 Subject: [PATCH 0349/1436] staging: octeon: fix broken phylib usage Commit 2b3e88ea6528 ("net: phy: improve phy state checking") added checks for phylib usage, and this triggers with OCTEON ethernet and results in broken networking. Fix by replacing phy_start_aneg() with phy_start(). Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Signed-off-by: Aaro Koskinen Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 2848fa71a33d..d6248eecf123 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -170,7 +170,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev) return -ENODEV; priv->last_link = 0; - phy_start_aneg(phydev); + phy_start(phydev); return 0; no_phy: From d4104c5e783f5d053b97268fb92001d785de7dd5 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 29 Jan 2019 23:55:40 +0800 Subject: [PATCH 0350/1436] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei() As Al pointed out, " ... and while we are at it, what happens to unsigned int nameoff = le16_to_cpu(de[mid].nameoff); unsigned int matched = min(startprfx, endprfx); struct qstr dname = QSTR_INIT(data + nameoff, unlikely(mid >= ndirents - 1) ? maxsize - nameoff : le16_to_cpu(de[mid + 1].nameoff) - nameoff); /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); if le16_to_cpu(de[...].nameoff) is not monotonically increasing? I.e. what's to prevent e.g. (unsigned)-1 ending up in dname.len? Corrupted fs image shouldn't oops the kernel.. " Revisit the related lookup flow to address the issue. Fixes: d72d1ce60174 ("staging: erofs: add namei functions") Cc: # 4.19+ Suggested-by: Al Viro Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/namei.c | 167 ++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 78 deletions(-) diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c index 5596c52e246d..a1300c420e63 100644 --- a/drivers/staging/erofs/namei.c +++ b/drivers/staging/erofs/namei.c @@ -15,74 +15,76 @@ #include -/* based on the value of qn->len is accurate */ -static inline int dirnamecmp(struct qstr *qn, - struct qstr *qd, unsigned int *matched) +struct erofs_qstr { + const unsigned char *name; + const unsigned char *end; +}; + +/* based on the end of qn is accurate and it must have the trailing '\0' */ +static inline int dirnamecmp(const struct erofs_qstr *qn, + const struct erofs_qstr *qd, + unsigned int *matched) { - unsigned int i = *matched, len = min(qn->len, qd->len); -loop: - if (unlikely(i >= len)) { - *matched = i; - if (qn->len < qd->len) { - /* - * actually (qn->len == qd->len) - * when qd->name[i] == '\0' - */ - return qd->name[i] == '\0' ? 0 : -1; + unsigned int i = *matched; + + /* + * on-disk error, let's only BUG_ON in the debugging mode. + * otherwise, it will return 1 to just skip the invalid name + * and go on (in consideration of the lookup performance). + */ + DBG_BUGON(qd->name > qd->end); + + /* qd could not have trailing '\0' */ + /* However it is absolutely safe if < qd->end */ + while (qd->name + i < qd->end && qd->name[i] != '\0') { + if (qn->name[i] != qd->name[i]) { + *matched = i; + return qn->name[i] > qd->name[i] ? 1 : -1; } - return (qn->len > qd->len); + ++i; } - - if (qn->name[i] != qd->name[i]) { - *matched = i; - return qn->name[i] > qd->name[i] ? 1 : -1; - } - - ++i; - goto loop; + *matched = i; + /* See comments in __d_alloc on the terminating NUL character */ + return qn->name[i] == '\0' ? 0 : 1; } -static struct erofs_dirent *find_target_dirent( - struct qstr *name, - u8 *data, int maxsize) +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) + +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, + u8 *data, + unsigned int dirblksize, + const int ndirents) { - unsigned int ndirents, head, back; + int head, back; unsigned int startprfx, endprfx; struct erofs_dirent *const de = (struct erofs_dirent *)data; - /* make sure that maxsize is valid */ - BUG_ON(maxsize < sizeof(struct erofs_dirent)); - - ndirents = le16_to_cpu(de->nameoff) / sizeof(*de); - - /* corrupted dir (may be unnecessary...) */ - BUG_ON(!ndirents); - head = 0; back = ndirents - 1; startprfx = endprfx = 0; while (head <= back) { - unsigned int mid = head + (back - head) / 2; - unsigned int nameoff = le16_to_cpu(de[mid].nameoff); + const int mid = head + (back - head) / 2; + const int nameoff = nameoff_from_disk(de[mid].nameoff, + dirblksize); unsigned int matched = min(startprfx, endprfx); - - struct qstr dname = QSTR_INIT(data + nameoff, - unlikely(mid >= ndirents - 1) ? - maxsize - nameoff : - le16_to_cpu(de[mid + 1].nameoff) - nameoff); + struct erofs_qstr dname = { + .name = data + nameoff, + .end = unlikely(mid >= ndirents - 1) ? + data + dirblksize : + data + nameoff_from_disk(de[mid + 1].nameoff, + dirblksize) + }; /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); - if (unlikely(!ret)) + if (unlikely(!ret)) { return de + mid; - else if (ret > 0) { + } else if (ret > 0) { head = mid + 1; startprfx = matched; - } else if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - else { + } else { back = mid - 1; endprfx = matched; } @@ -91,12 +93,13 @@ static struct erofs_dirent *find_target_dirent( return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic( - struct inode *dir, - struct qstr *name, int *_diff) +static struct page *find_target_block_classic(struct inode *dir, + struct erofs_qstr *name, + int *_diff, + int *_ndirents) { unsigned int startprfx, endprfx; - unsigned int head, back; + int head, back; struct address_space *const mapping = dir->i_mapping; struct page *candidate = ERR_PTR(-ENOENT); @@ -105,33 +108,34 @@ static struct page *find_target_block_classic( back = inode_datablocks(dir) - 1; while (head <= back) { - unsigned int mid = head + (back - head) / 2; + const int mid = head + (back - head) / 2; struct page *page = read_mapping_page(mapping, mid, NULL); - if (IS_ERR(page)) { -exact_out: - if (!IS_ERR(candidate)) /* valid candidate */ - put_page(candidate); - return page; - } else { - int diff; - unsigned int ndirents, matched; - struct qstr dname; + if (!IS_ERR(page)) { struct erofs_dirent *de = kmap_atomic(page); - unsigned int nameoff = le16_to_cpu(de->nameoff); + const int nameoff = nameoff_from_disk(de->nameoff, + EROFS_BLKSIZ); + const int ndirents = nameoff / sizeof(*de); + int diff; + unsigned int matched; + struct erofs_qstr dname; - ndirents = nameoff / sizeof(*de); - - /* corrupted dir (should have one entry at least) */ - BUG_ON(!ndirents || nameoff > PAGE_SIZE); + if (unlikely(!ndirents)) { + DBG_BUGON(1); + put_page(page); + page = ERR_PTR(-EIO); + goto out; + } matched = min(startprfx, endprfx); dname.name = (u8 *)de + nameoff; - dname.len = ndirents == 1 ? - /* since the rest of the last page is 0 */ - EROFS_BLKSIZ - nameoff - : le16_to_cpu(de[1].nameoff) - nameoff; + if (ndirents == 1) + dname.end = (u8 *)de + EROFS_BLKSIZ; + else + dname.end = (u8 *)de + + nameoff_from_disk(de[1].nameoff, + EROFS_BLKSIZ); /* string comparison without already matched prefix */ diff = dirnamecmp(name, &dname, &matched); @@ -139,7 +143,7 @@ exact_out: if (unlikely(!diff)) { *_diff = 0; - goto exact_out; + goto out; } else if (diff > 0) { head = mid + 1; startprfx = matched; @@ -147,35 +151,42 @@ exact_out: if (likely(!IS_ERR(candidate))) put_page(candidate); candidate = page; + *_ndirents = ndirents; } else { put_page(page); - if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - back = mid - 1; endprfx = matched; } + continue; } +out: /* free if the candidate is valid */ + if (!IS_ERR(candidate)) + put_page(candidate); + return page; } *_diff = 1; return candidate; } int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type) + struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type) { - int diff; + int diff, ndirents; struct page *page; u8 *data; struct erofs_dirent *de; + struct erofs_qstr qn; if (unlikely(!dir->i_size)) return -ENOENT; + qn.name = name->name; + qn.end = name->name + name->len; + diff = 1; - page = find_target_block_classic(dir, name, &diff); + page = find_target_block_classic(dir, &qn, &diff, &ndirents); if (unlikely(IS_ERR(page))) return PTR_ERR(page); @@ -184,7 +195,7 @@ int erofs_namei(struct inode *dir, /* the target page has been mapped */ de = likely(diff) ? /* since the rest of the last page is 0 */ - find_target_dirent(name, data, EROFS_BLKSIZ) : + find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents) : (struct erofs_dirent *)data; if (likely(!IS_ERR(de))) { From 5aa608348f86d90f05b16dec93963801e2dd4ef5 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 29 Jan 2019 11:22:07 +0100 Subject: [PATCH 0351/1436] mic: vop: Fix broken virtqueues VOP is broken in mainline since commit 1ce9e6055fa0a9043 ("virtio_ring: introduce packed ring support"); attempting to use the virtqueues leads to various kernel crashes. I'm testing it with my not-yet-merged loopback patches, but even the in-tree MIC hardware cannot work. The problem is not in the referenced commit per se, but is due to the following hack in vop_find_vq() which depends on the layout of private structures in other source files, which that commit happened to change: /* * To reassign the used ring here we are directly accessing * struct vring_virtqueue which is a private data structure * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in * vring_new_virtqueue() would ensure that * (&vq->vring == (struct vring *) (&vq->vq + 1)); */ vr = (struct vring *)(vq + 1); vr->used = used; Fix vop by using __vring_new_virtqueue() to create the needed vring layout from the start, instead of attempting to patch in the used ring later. __vring_new_virtqueue() was added way back in commit 2a2d1382fe9dcc ("virtio: Add improved queue allocation API") in order to address mic's usecase, according to the commit message. Fixes: 1ce9e6055fa0 ("virtio_ring: introduce packed ring support") Signed-off-by: Vincent Whitchurch Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/vop/vop_main.c | 60 +++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 2bfa3a903bf9..2bd57c2ca02b 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -283,6 +283,26 @@ static void vop_del_vqs(struct virtio_device *dev) vop_del_vq(vq, idx++); } +static struct virtqueue *vop_new_virtqueue(unsigned int index, + unsigned int num, + struct virtio_device *vdev, + bool context, + void *pages, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name, + void *used) +{ + bool weak_barriers = false; + struct vring vring; + + vring_init(&vring, num, pages, MIC_VIRTIO_RING_ALIGN); + vring.used = used; + + return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, + notify, callback, name); +} + /* * This routine will assign vring's allocated in host/io memory. Code in * virtio_ring.c however continues to access this io memory as if it were local @@ -302,7 +322,6 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, struct _mic_vring_info __iomem *info; void *used; int vr_size, _vr_size, err, magic; - struct vring *vr; u8 type = ioread8(&vdev->desc->type); if (index >= ioread8(&vdev->desc->num_vq)) @@ -322,17 +341,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, return ERR_PTR(-ENOMEM); vdev->vr[index] = va; memset_io(va, 0x0, _vr_size); - vq = vring_new_virtqueue( - index, - le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN, - dev, - false, - ctx, - (void __force *)va, vop_notify, callback, name); - if (!vq) { - err = -ENOMEM; - goto unmap; - } + info = va + _vr_size; magic = ioread32(&info->magic); @@ -341,7 +350,6 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, goto unmap; } - /* Allocate and reassign used ring now */ vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * le16_to_cpu(config.num)); @@ -351,8 +359,17 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, err = -ENOMEM; dev_err(_vop_dev(vdev), "%s %d err %d\n", __func__, __LINE__, err); - goto del_vq; + goto unmap; } + + vq = vop_new_virtqueue(index, le16_to_cpu(config.num), dev, ctx, + (void __force *)va, vop_notify, callback, + name, used); + if (!vq) { + err = -ENOMEM; + goto free_used; + } + vdev->used[index] = dma_map_single(&vpdev->dev, used, vdev->used_size[index], DMA_BIDIRECTIONAL); @@ -360,26 +377,17 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, err = -ENOMEM; dev_err(_vop_dev(vdev), "%s %d err %d\n", __func__, __LINE__, err); - goto free_used; + goto del_vq; } writeq(vdev->used[index], &vqconfig->used_address); - /* - * To reassign the used ring here we are directly accessing - * struct vring_virtqueue which is a private data structure - * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in - * vring_new_virtqueue() would ensure that - * (&vq->vring == (struct vring *) (&vq->vq + 1)); - */ - vr = (struct vring *)(vq + 1); - vr->used = used; vq->priv = vdev; return vq; +del_vq: + vring_del_virtqueue(vq); free_used: free_pages((unsigned long)used, get_order(vdev->used_size[index])); -del_vq: - vring_del_virtqueue(vq); unmap: vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]); return ERR_PTR(err); From c418fd6c01fbc5516a2cd1eaf1df1ec86869028a Mon Sep 17 00:00:00 2001 From: Paul Elder Date: Wed, 30 Jan 2019 08:13:21 -0600 Subject: [PATCH 0352/1436] usb: gadget: musb: fix short isoc packets with inventra dma Handling short packets (length < max packet size) in the Inventra DMA engine in the MUSB driver causes the MUSB DMA controller to hang. An example of a problem that is caused by this problem is when streaming video out of a UVC gadget, only the first video frame is transferred. For short packets (mode-0 or mode-1 DMA), MUSB_TXCSR_TXPKTRDY must be set manually by the driver. This was previously done in musb_g_tx (musb_gadget.c), but incorrectly (all csr flags were cleared, and only MUSB_TXCSR_MODE and MUSB_TXCSR_TXPKTRDY were set). Fixing that problem allows some requests to be transferred correctly, but multiple requests were often put together in one USB packet, and caused problems if the packet size was not a multiple of 4. Instead, set MUSB_TXCSR_TXPKTRDY in dma_controller_irq (musbhsdma.c), just like host mode transfers. This topic was originally tackled by Nicolas Boichat [0] [1] and is discussed further at [2] as part of his GSoC project [3]. [0] https://groups.google.com/forum/?hl=en#!topic/beagleboard-gsoc/k8Azwfp75CU [1] https://gitorious.org/beagleboard-usbsniffer/beagleboard-usbsniffer-kernel/commit/b0be3b6cc195ba732189b04f1d43ec843c3e54c9?p=beagleboard-usbsniffer:beagleboard-usbsniffer-kernel.git;a=patch;h=b0be3b6cc195ba732189b04f1d43ec843c3e54c9 [2] http://beagleboard-usbsniffer.blogspot.com/2010/07/musb-isochronous-transfers-fixed.html [3] http://elinux.org/BeagleBoard/GSoC/USBSniffer Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Signed-off-by: Paul Elder Signed-off-by: Bin Liu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 13 +------------ drivers/usb/musb/musbhsdma.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index eae8b1b1b45b..ffe462a657b1 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -452,13 +452,10 @@ void musb_g_tx(struct musb *musb, u8 epnum) } if (request) { - u8 is_dma = 0; - bool short_packet = false; trace_musb_req_tx(req); if (dma && (csr & MUSB_TXCSR_DMAENAB)) { - is_dma = 1; csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN | MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET); @@ -476,16 +473,8 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ if ((request->zero && request->length) && (request->length % musb_ep->packet_sz == 0) - && (request->actual == request->length)) - short_packet = true; + && (request->actual == request->length)) { - if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) && - (is_dma && (!dma->desired_mode || - (request->actual & - (musb_ep->packet_sz - 1))))) - short_packet = true; - - if (short_packet) { /* * On DMA completion, FIFO may not be * available yet... diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index a688f7f87829..5fc6825745f2 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -346,12 +346,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) channel->status = MUSB_DMA_STATUS_FREE; /* completed */ - if ((devctl & MUSB_DEVCTL_HM) - && (musb_channel->transmit) - && ((channel->desired_mode == 0) - || (channel->actual_len & - (musb_channel->max_packet_sz - 1))) - ) { + if (musb_channel->transmit && + (!channel->desired_mode || + (channel->actual_len % + musb_channel->max_packet_sz))) { u8 epnum = musb_channel->epnum; int offset = musb->io.ep_offset(epnum, MUSB_TXCSR); @@ -363,11 +361,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data) */ musb_ep_select(mbase, epnum); txcsr = musb_readw(mbase, offset); - txcsr &= ~(MUSB_TXCSR_DMAENAB + if (channel->desired_mode == 1) { + txcsr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_AUTOSET); - musb_writew(mbase, offset, txcsr); - /* Send out the packet */ - txcsr &= ~MUSB_TXCSR_DMAMODE; + musb_writew(mbase, offset, txcsr); + /* Send out the packet */ + txcsr &= ~MUSB_TXCSR_DMAMODE; + txcsr |= MUSB_TXCSR_DMAENAB; + } txcsr |= MUSB_TXCSR_TXPKTRDY; musb_writew(mbase, offset, txcsr); } From 2e3c18d0ada16f145087b2687afcad1748c0827c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 30 Jan 2019 22:21:45 +0900 Subject: [PATCH 0353/1436] block: pass no-op callback to INIT_WORK(). syzbot is hitting flush_work() warning caused by commit 4d43d395fed12463 ("workqueue: Try to catch flush_work() without INIT_WORK().") [1]. Although that commit did not expect INIT_WORK(NULL) case, calling flush_work() without setting a valid callback should be avoided anyway. Fix this problem by setting a no-op callback instead of NULL. [1] https://syzkaller.appspot.com/bug?id=e390366bc48bc82a7c668326e0663be3b91cbd29 Signed-off-by: Tetsuo Handa Reported-and-tested-by: syzbot Cc: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 3c5f61ceeb67..6b78ec56a4f2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -462,6 +462,10 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } +static void blk_timeout_work(struct work_struct *work) +{ +} + /** * blk_alloc_queue_node - allocate a request queue * @gfp_mask: memory allocation flags @@ -505,7 +509,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); - INIT_WORK(&q->timeout_work, NULL); + INIT_WORK(&q->timeout_work, blk_timeout_work); INIT_LIST_HEAD(&q->icq_list); #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); From 85bd6e61f34dffa8ec2dc75ff3c02ee7b2f1cbce Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Wed, 30 Jan 2019 17:01:56 +0800 Subject: [PATCH 0354/1436] blk-mq: fix a hung issue when fsync Florian reported a io hung issue when fsync(). It should be triggered by following race condition. data + post flush a flush blk_flush_complete_seq case REQ_FSEQ_DATA blk_flush_queue_rq issued to driver blk_mq_dispatch_rq_list try to issue a flush req failed due to NON-NCQ command .queue_rq return BLK_STS_DEV_RESOURCE request completion req->end_io // doesn't check RESTART mq_flush_data_end_io case REQ_FSEQ_POSTFLUSH blk_kick_flush do nothing because previous flush has not been completed blk_mq_run_hw_queue insert rq to hctx->dispatch due to RESTART is still set, do nothing To fix this, replace the blk_mq_run_hw_queue in mq_flush_data_end_io with blk_mq_sched_restart to check and clear the RESTART flag. Fixes: bd166ef1 (blk-mq-sched: add framework for MQ capable IO schedulers) Reported-by: Florian Stecker Tested-by: Florian Stecker Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index a3fc7191c694..6e0f2d97fc6d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -335,7 +335,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); - blk_mq_run_hw_queue(hctx, true); + blk_mq_sched_restart(hctx); } /** From 8950dcd83ae7d62bdc2a60507949acebd85399f2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 24 Jan 2019 10:31:32 +0800 Subject: [PATCH 0355/1436] iommu/vt-d: Leave scalable mode default off Commit 765b6a98c1de3 ("iommu/vt-d: Enumerate the scalable mode capability") enables VT-d scalable mode if hardware advertises the capability. As we will bring up different features and use cases to upstream in different patch series, it will leave some intermediate kernel versions which support partial features. Hence, end user might run into problems when they use such kernels on bare metals or virtualization environments. This leaves scalable mode default off and end users could turn it on with "intel-iommu=sm_on" only when they have clear ideas about which scalable features are supported in the kernel. Cc: Liu Yi L Cc: Jacob Pan Suggested-by: Ashok Raj Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 7 +++---- drivers/iommu/intel-iommu.c | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b799bcf67d7b..858b6c0b9a15 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1696,12 +1696,11 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - sm_off [Default Off] - By default, scalable mode will be supported if the + sm_on [Default Off] + By default, scalable mode will be disabled even if the hardware advertises that it has support for the scalable mode translation. With this option set, scalable mode - will not be used even on hardware which claims to support - it. + will be used on hardware which claims to support it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1457f931218e..78188bf7e90d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -363,7 +363,7 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_sm = 1; +static int intel_iommu_sm; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 @@ -456,9 +456,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "sm_off", 6)) { - pr_info("Intel-IOMMU: disable scalable mode support\n"); - intel_iommu_sm = 0; + } else if (!strncmp(str, "sm_on", 5)) { + pr_info("Intel-IOMMU: scalable mode supported\n"); + intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); From 92900e5160a5444d47dd376bc40066b709fbb5a6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Jan 2019 04:58:00 +0000 Subject: [PATCH 0356/1436] btrfs: fix potential oops in device_list_add alloc_fs_devices() can return ERR_PTR(-ENOMEM), so dereferencing its result before the check for IS_ERR() is a bad idea. Fixes: d1a63002829a4 ("btrfs: add members to fs_devices to track fsid changes") Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Al Viro Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3e4f8f88353e..15561926ab32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, else fs_devices = alloc_fs_devices(disk_super->fsid, NULL); - fs_devices->fsid_change = fsid_change_in_progress; - if (IS_ERR(fs_devices)) return ERR_CAST(fs_devices); + fs_devices->fsid_change = fsid_change_in_progress; + mutex_lock(&fs_devices->device_list_mutex); list_add(&fs_devices->fs_list, &fs_uuids); From c7cc64a98512ffc41df86d14a414eb3b09bf7481 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 23 Jan 2019 17:09:16 +0100 Subject: [PATCH 0357/1436] btrfs: clean up pending block groups when transaction commit aborts The fstests generic/475 stresses transaction aborts and can reveal space accounting or use-after-free bugs regarding block goups. In this case the pending block groups that remain linked to the structures after transaction commit aborts in the middle. The corrupted slabs lead to failures in following tests, eg. generic/476 [ 8172.752887] BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 [ 8172.755799] #PF error: [normal kernel read fault] [ 8172.757571] PGD 661ae067 P4D 661ae067 PUD 3db8e067 PMD 0 [ 8172.759000] Oops: 0000 [#1] PREEMPT SMP [ 8172.760209] CPU: 0 PID: 39 Comm: kswapd0 Tainted: G W 5.0.0-rc2-default #408 [ 8172.762495] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014 [ 8172.765772] RIP: 0010:shrink_page_list+0x2f9/0xe90 [ 8172.770453] RSP: 0018:ffff967f00663b18 EFLAGS: 00010287 [ 8172.771184] RAX: 0000000000000000 RBX: ffff967f00663c20 RCX: 0000000000000000 [ 8172.772850] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8c0620ab20e0 [ 8172.774629] RBP: ffff967f00663dd8 R08: 0000000000000000 R09: 0000000000000000 [ 8172.776094] R10: ffff8c0620ab22f8 R11: ffff8c063f772688 R12: ffff967f00663b78 [ 8172.777533] R13: ffff8c063f625600 R14: ffff8c063f625608 R15: dead000000000200 [ 8172.778886] FS: 0000000000000000(0000) GS:ffff8c063d400000(0000) knlGS:0000000000000000 [ 8172.780545] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8172.781787] CR2: 0000000000000058 CR3: 000000004e962000 CR4: 00000000000006f0 [ 8172.783547] Call Trace: [ 8172.784112] shrink_inactive_list+0x194/0x410 [ 8172.784747] shrink_node_memcg.constprop.85+0x3a5/0x6a0 [ 8172.785472] shrink_node+0x62/0x1e0 [ 8172.786011] balance_pgdat+0x216/0x460 [ 8172.786577] kswapd+0xe3/0x4a0 [ 8172.787085] ? finish_wait+0x80/0x80 [ 8172.787795] ? balance_pgdat+0x460/0x460 [ 8172.788799] kthread+0x116/0x130 [ 8172.789640] ? kthread_create_on_node+0x60/0x60 [ 8172.790323] ret_from_fork+0x24/0x30 [ 8172.794253] CR2: 0000000000000058 or accounting errors at umount time: [ 8159.537251] WARNING: CPU: 2 PID: 19031 at fs/btrfs/extent-tree.c:5987 btrfs_free_block_groups+0x3d5/0x410 [btrfs] [ 8159.543325] CPU: 2 PID: 19031 Comm: umount Tainted: G W 5.0.0-rc2-default #408 [ 8159.545472] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014 [ 8159.548155] RIP: 0010:btrfs_free_block_groups+0x3d5/0x410 [btrfs] [ 8159.554030] RSP: 0018:ffff967f079cbde8 EFLAGS: 00010206 [ 8159.555144] RAX: 0000000001000000 RBX: ffff8c06366cf800 RCX: 0000000000000000 [ 8159.556730] RDX: 0000000000000002 RSI: 0000000000000001 RDI: ffff8c06255ad800 [ 8159.558279] RBP: ffff8c0637ac0000 R08: 0000000000000001 R09: 0000000000000000 [ 8159.559797] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8c0637ac0108 [ 8159.561296] R13: ffff8c0637ac0158 R14: 0000000000000000 R15: dead000000000100 [ 8159.562852] FS: 00007f7f693b9fc0(0000) GS:ffff8c063d800000(0000) knlGS:0000000000000000 [ 8159.564839] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8159.566160] CR2: 00007f7f68fab7b0 CR3: 000000000aec7000 CR4: 00000000000006e0 [ 8159.567898] Call Trace: [ 8159.568597] close_ctree+0x17f/0x350 [btrfs] [ 8159.569628] generic_shutdown_super+0x64/0x100 [ 8159.570808] kill_anon_super+0x14/0x30 [ 8159.571857] btrfs_kill_super+0x12/0xa0 [btrfs] [ 8159.573063] deactivate_locked_super+0x29/0x60 [ 8159.574234] cleanup_mnt+0x3b/0x70 [ 8159.575176] task_work_run+0x98/0xc0 [ 8159.576177] exit_to_usermode_loop+0x83/0x90 [ 8159.577315] do_syscall_64+0x15b/0x180 [ 8159.578339] entry_SYSCALL_64_after_hwframe+0x49/0xbe This fix is based on 2 Josef's patches that used sideefects of btrfs_create_pending_block_groups, this fix introduces the helper that does what we need. CC: stable@vger.kernel.org # 4.4+ CC: Josef Bacik Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f15cf46f1b9d..4ec2b660d014 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1871,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) kmem_cache_free(btrfs_trans_handle_cachep, trans); } +/* + * Release reserved delayed ref space of all pending block groups of the + * transaction and remove them from the list + */ +static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_block_group_cache *block_group, *tmp; + + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + btrfs_delayed_refs_rsv_release(fs_info, 1); + list_del_init(&block_group->bg_list); + } +} + static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { /* @@ -2262,6 +2277,7 @@ scrub_continue: btrfs_scrub_continue(fs_info); cleanup_transaction: btrfs_trans_release_metadata(trans); + btrfs_cleanup_pending_block_groups(trans); btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; btrfs_warn(fs_info, "Skipping commit of aborted transaction."); From 532b618bdf237250d6d4566536d4b6ce3d0a31fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 30 Jan 2019 07:54:12 -0600 Subject: [PATCH 0358/1436] btrfs: On error always free subvol_name in btrfs_mount The subvol_name is allocated in btrfs_parse_subvol_options and is consumed and freed in mount_subvol. Add a free to the error paths that don't call mount_subvol so that it is guaranteed that subvol_name is freed when an error happens. Fixes: 312c89fbca06 ("btrfs: cleanup btrfs_mount() using btrfs_mount_root()") Cc: stable@vger.kernel.org # v4.19+ Reviewed-by: Nikolay Borisov Signed-off-by: "Eric W. Biederman" Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 368a5b9e6c13..74023786a735 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1677,6 +1677,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, flags | SB_RDONLY, device_name, data); if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } @@ -1686,12 +1687,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (error < 0) { root = ERR_PTR(error); mntput(mnt_root); + kfree(subvol_name); goto out; } } } if (IS_ERR(mnt_root)) { root = ERR_CAST(mnt_root); + kfree(subvol_name); goto out; } From 6e11ea9de9576a644045ffdc2067c09bc2012eda Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Jan 2019 10:55:17 +0000 Subject: [PATCH 0359/1436] drm/amdgpu: Transfer fences to dmabuf importer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu only uses shared-fences internally, but dmabuf importers rely on implicit write hazard tracking via the reservation_object.fence_excl. For example, the importer use the write hazard for timing a page flip to only occur after the exporter has finished flushing its write into the surface. As such, on exporting a dmabuf, we must either flush all outstanding fences (for we do not know which are writes and should have been exclusive) or alternatively create a new exclusive fence that is the composite of all the existing shared fences, and so will only be signaled when all earlier fences are signaled (ensuring that we can not be signaled before the completion of any earlier write). v2: reservation_object is already locked by amdgpu_bo_reserve() v3: Replace looping with get_fences_rcu and special case the promotion of a single shared fence directly to an exclusive fence, bypassing the fence array. v4: Drop the fence array ref after assigning to reservation_object Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107341 Testcase: igt/amd_prime/amd-to-i915 References: 8e94a46c1770 ("drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5)") Signed-off-by: Chris Wilson Cc: Alex Deucher Cc: "Christian König" Reviewed-by: "Christian König" Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 59 ++++++++++++++++++++--- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 71913a18d142..a38e0fb4a6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -38,6 +38,7 @@ #include "amdgpu_gem.h" #include #include +#include /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table @@ -187,6 +188,48 @@ error: return ERR_PTR(ret); } +static int +__reservation_object_make_exclusive(struct reservation_object *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + return 0; + + r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + reservation_object_add_excl_fence(obj, fences[0]); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + reservation_object_add_excl_fence(obj, &array->base); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + /** * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation * @dma_buf: Shared DMA buffer @@ -218,16 +261,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, if (attach->dev->driver != adev->dev->driver) { /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. */ - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(r < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + r = __reservation_object_make_exclusive(bo->tbo.resv); + if (r) goto error_unreserve; - } } /* pin buffer into GTT */ From 798badf8467f41af6dfbf5621e1fc966c80446fd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Jan 2019 19:17:53 +0100 Subject: [PATCH 0360/1436] Revert "staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()" This reverts commit d4104c5e783f5d053b97268fb92001d785de7dd5. Turns out it still needs some more work, I merged it to soon :( Reported-by: Gao Xiang Reported-by: Dan Carpenter Cc: Al Viro Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/namei.c | 167 ++++++++++++++++------------------ 1 file changed, 78 insertions(+), 89 deletions(-) diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c index a1300c420e63..5596c52e246d 100644 --- a/drivers/staging/erofs/namei.c +++ b/drivers/staging/erofs/namei.c @@ -15,76 +15,74 @@ #include -struct erofs_qstr { - const unsigned char *name; - const unsigned char *end; -}; - -/* based on the end of qn is accurate and it must have the trailing '\0' */ -static inline int dirnamecmp(const struct erofs_qstr *qn, - const struct erofs_qstr *qd, - unsigned int *matched) +/* based on the value of qn->len is accurate */ +static inline int dirnamecmp(struct qstr *qn, + struct qstr *qd, unsigned int *matched) { - unsigned int i = *matched; - - /* - * on-disk error, let's only BUG_ON in the debugging mode. - * otherwise, it will return 1 to just skip the invalid name - * and go on (in consideration of the lookup performance). - */ - DBG_BUGON(qd->name > qd->end); - - /* qd could not have trailing '\0' */ - /* However it is absolutely safe if < qd->end */ - while (qd->name + i < qd->end && qd->name[i] != '\0') { - if (qn->name[i] != qd->name[i]) { - *matched = i; - return qn->name[i] > qd->name[i] ? 1 : -1; + unsigned int i = *matched, len = min(qn->len, qd->len); +loop: + if (unlikely(i >= len)) { + *matched = i; + if (qn->len < qd->len) { + /* + * actually (qn->len == qd->len) + * when qd->name[i] == '\0' + */ + return qd->name[i] == '\0' ? 0 : -1; } - ++i; + return (qn->len > qd->len); } - *matched = i; - /* See comments in __d_alloc on the terminating NUL character */ - return qn->name[i] == '\0' ? 0 : 1; + + if (qn->name[i] != qd->name[i]) { + *matched = i; + return qn->name[i] > qd->name[i] ? 1 : -1; + } + + ++i; + goto loop; } -#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) - -static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, - u8 *data, - unsigned int dirblksize, - const int ndirents) +static struct erofs_dirent *find_target_dirent( + struct qstr *name, + u8 *data, int maxsize) { - int head, back; + unsigned int ndirents, head, back; unsigned int startprfx, endprfx; struct erofs_dirent *const de = (struct erofs_dirent *)data; + /* make sure that maxsize is valid */ + BUG_ON(maxsize < sizeof(struct erofs_dirent)); + + ndirents = le16_to_cpu(de->nameoff) / sizeof(*de); + + /* corrupted dir (may be unnecessary...) */ + BUG_ON(!ndirents); + head = 0; back = ndirents - 1; startprfx = endprfx = 0; while (head <= back) { - const int mid = head + (back - head) / 2; - const int nameoff = nameoff_from_disk(de[mid].nameoff, - dirblksize); + unsigned int mid = head + (back - head) / 2; + unsigned int nameoff = le16_to_cpu(de[mid].nameoff); unsigned int matched = min(startprfx, endprfx); - struct erofs_qstr dname = { - .name = data + nameoff, - .end = unlikely(mid >= ndirents - 1) ? - data + dirblksize : - data + nameoff_from_disk(de[mid + 1].nameoff, - dirblksize) - }; + + struct qstr dname = QSTR_INIT(data + nameoff, + unlikely(mid >= ndirents - 1) ? + maxsize - nameoff : + le16_to_cpu(de[mid + 1].nameoff) - nameoff); /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); - if (unlikely(!ret)) { + if (unlikely(!ret)) return de + mid; - } else if (ret > 0) { + else if (ret > 0) { head = mid + 1; startprfx = matched; - } else { + } else if (unlikely(mid < 1)) /* fix "mid" overflow */ + break; + else { back = mid - 1; endprfx = matched; } @@ -93,13 +91,12 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_diff, - int *_ndirents) +static struct page *find_target_block_classic( + struct inode *dir, + struct qstr *name, int *_diff) { unsigned int startprfx, endprfx; - int head, back; + unsigned int head, back; struct address_space *const mapping = dir->i_mapping; struct page *candidate = ERR_PTR(-ENOENT); @@ -108,34 +105,33 @@ static struct page *find_target_block_classic(struct inode *dir, back = inode_datablocks(dir) - 1; while (head <= back) { - const int mid = head + (back - head) / 2; + unsigned int mid = head + (back - head) / 2; struct page *page = read_mapping_page(mapping, mid, NULL); - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); - const int nameoff = nameoff_from_disk(de->nameoff, - EROFS_BLKSIZ); - const int ndirents = nameoff / sizeof(*de); + if (IS_ERR(page)) { +exact_out: + if (!IS_ERR(candidate)) /* valid candidate */ + put_page(candidate); + return page; + } else { int diff; - unsigned int matched; - struct erofs_qstr dname; + unsigned int ndirents, matched; + struct qstr dname; + struct erofs_dirent *de = kmap_atomic(page); + unsigned int nameoff = le16_to_cpu(de->nameoff); - if (unlikely(!ndirents)) { - DBG_BUGON(1); - put_page(page); - page = ERR_PTR(-EIO); - goto out; - } + ndirents = nameoff / sizeof(*de); + + /* corrupted dir (should have one entry at least) */ + BUG_ON(!ndirents || nameoff > PAGE_SIZE); matched = min(startprfx, endprfx); dname.name = (u8 *)de + nameoff; - if (ndirents == 1) - dname.end = (u8 *)de + EROFS_BLKSIZ; - else - dname.end = (u8 *)de + - nameoff_from_disk(de[1].nameoff, - EROFS_BLKSIZ); + dname.len = ndirents == 1 ? + /* since the rest of the last page is 0 */ + EROFS_BLKSIZ - nameoff + : le16_to_cpu(de[1].nameoff) - nameoff; /* string comparison without already matched prefix */ diff = dirnamecmp(name, &dname, &matched); @@ -143,7 +139,7 @@ static struct page *find_target_block_classic(struct inode *dir, if (unlikely(!diff)) { *_diff = 0; - goto out; + goto exact_out; } else if (diff > 0) { head = mid + 1; startprfx = matched; @@ -151,42 +147,35 @@ static struct page *find_target_block_classic(struct inode *dir, if (likely(!IS_ERR(candidate))) put_page(candidate); candidate = page; - *_ndirents = ndirents; } else { put_page(page); + if (unlikely(mid < 1)) /* fix "mid" overflow */ + break; + back = mid - 1; endprfx = matched; } - continue; } -out: /* free if the candidate is valid */ - if (!IS_ERR(candidate)) - put_page(candidate); - return page; } *_diff = 1; return candidate; } int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type) + struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type) { - int diff, ndirents; + int diff; struct page *page; u8 *data; struct erofs_dirent *de; - struct erofs_qstr qn; if (unlikely(!dir->i_size)) return -ENOENT; - qn.name = name->name; - qn.end = name->name + name->len; - diff = 1; - page = find_target_block_classic(dir, &qn, &diff, &ndirents); + page = find_target_block_classic(dir, name, &diff); if (unlikely(IS_ERR(page))) return PTR_ERR(page); @@ -195,7 +184,7 @@ int erofs_namei(struct inode *dir, /* the target page has been mapped */ de = likely(diff) ? /* since the rest of the last page is 0 */ - find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents) : + find_target_dirent(name, data, EROFS_BLKSIZ) : (struct erofs_dirent *)data; if (likely(!IS_ERR(de))) { From 63ff03ab786ab1bc6cca01d48eacd22c95b9b3eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:17 +0100 Subject: [PATCH 0361/1436] Revert "socket: fix struct ifreq size in compat ioctl" This reverts commit 1cebf8f143c2 ("socket: fix struct ifreq size in compat ioctl"), it's a bugfix for another commit that I'll revert next. This is not a 'perfect' revert, I'm keeping some coding style intact rather than revert to the state with indentation errors. Cc: stable@vger.kernel.org Fixes: 1cebf8f143c2 ("socket: fix struct ifreq size in compat ioctl") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/net/socket.c b/net/socket.c index e89884e2197b..63b53af7379b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -941,8 +941,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@ -968,11 +967,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@ -1071,8 +1070,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -2780,8 +2778,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@ -2797,8 +2794,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@ -3109,8 +3105,7 @@ static int routing_ioctl(struct net *net, struct socket *sock, } set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs); out: @@ -3223,8 +3218,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return sock_do_ioctl(net, sock, cmd, arg); } return -ENOIOCTLCMD; From 37ac39bdddc528c998a9f36db36937de923fdf2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:18 +0100 Subject: [PATCH 0362/1436] Revert "kill dev_ifsioc()" This reverts commit bf4405737f9f ("kill dev_ifsioc()"). This wasn't really unused as implied by the original commit, it still handles the copy to/from user differently, and the commit thus caused issues such as https://bugzilla.kernel.org/show_bug.cgi?id=199469 and https://bugzilla.kernel.org/show_bug.cgi?id=202273 However, deviating from a strict revert, rename dev_ifsioc() to compat_ifreq_ioctl() to be clearer as to its purpose and add a comment. Cc: stable@vger.kernel.org Fixes: bf4405737f9f ("kill dev_ifsioc()") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/net/socket.c b/net/socket.c index 63b53af7379b..fbf80f9fb057 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2990,6 +2990,53 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, NULL); } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@ -3209,6 +3256,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + return compat_ifreq_ioctl(net, sock, cmd, argp); + case SIOCSARP: case SIOCGARP: case SIOCDARP: From c6c9fee35dc27362b7bac34b2fc9f5b8ace2e22c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:19 +0100 Subject: [PATCH 0363/1436] net: socket: fix SIOCGIFNAME in compat As reported by Robert O'Callahan in https://bugzilla.kernel.org/show_bug.cgi?id=202273 reverting the previous changes in this area broke the SIOCGIFNAME ioctl in compat again (I'd previously fixed it after his previous report of breakage in https://bugzilla.kernel.org/show_bug.cgi?id=199469). This is obviously because I fixed SIOCGIFNAME more or less by accident. Fix it explicitly now by making it pass through the restored compat translation code. Cc: stable@vger.kernel.org Fixes: 4cf808e7ac32 ("kill dev_ifname32()") Reported-by: Robert O'Callahan Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index fbf80f9fb057..473ac8d7c54e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3029,6 +3029,7 @@ static int compat_ifreq_ioctl(struct net *net, struct socket *sock, case SIOCGIFTXQLEN: case SIOCGMIIPHY: case SIOCGMIIREG: + case SIOCGIFNAME: if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) err = -EFAULT; break; @@ -3252,6 +3253,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: @@ -3266,7 +3268,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); } From 98406133dd9cb9f195676eab540c270dceca879a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jan 2019 22:43:20 +0100 Subject: [PATCH 0364/1436] net: socket: make bond ioctls go through compat_ifreq_ioctl() Same story as before, these use struct ifreq and thus need to be read with the shorter version to not cause faults. Cc: stable@vger.kernel.org Fixes: f92d4fc95341 ("kill bond_ioctl()") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/socket.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index 473ac8d7c54e..d80d87a395ea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3258,16 +3258,16 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: return compat_ifreq_ioctl(net, sock, cmd, argp); case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: return sock_do_ioctl(net, sock, cmd, arg); } From b284909abad48b07d3071a9fc9b5692b3e64914b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 30 Jan 2019 07:13:58 -0600 Subject: [PATCH 0365/1436] cpu/hotplug: Fix "SMT disabled by BIOS" detection for KVM With the following commit: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") ... the hotplug code attempted to detect when SMT was disabled by BIOS, in which case it reported SMT as permanently disabled. However, that code broke a virt hotplug scenario, where the guest is booted with only primary CPU threads, and a sibling is brought online later. The problem is that there doesn't seem to be a way to reliably distinguish between the HW "SMT disabled by BIOS" case and the virt "sibling not yet brought online" case. So the above-mentioned commit was a bit misguided, as it permanently disabled SMT for both cases, preventing future virt sibling hotplugs. Going back and reviewing the original problems which were attempted to be solved by that commit, when SMT was disabled in BIOS: 1) /sys/devices/system/cpu/smt/control showed "on" instead of "notsupported"; and 2) vmx_vm_init() was incorrectly showing the L1TF_MSG_SMT warning. I'd propose that we instead consider #1 above to not actually be a problem. Because, at least in the virt case, it's possible that SMT wasn't disabled by BIOS and a sibling thread could be brought online later. So it makes sense to just always default the smt control to "on" to allow for that possibility (assuming cpuid indicates that the CPU supports SMT). The real problem is #2, which has a simple fix: change vmx_vm_init() to query the actual current SMT state -- i.e., whether any siblings are currently online -- instead of looking at the SMT "control" sysfs value. So fix it by: a) reverting the original "fix" and its followup fix: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") bc2d8d262cba ("cpu/hotplug: Fix SMT supported evaluation") and b) changing vmx_vm_init() to query the actual current SMT state -- instead of the sysfs control value -- to determine whether the L1TF warning is needed. This also requires the 'sched_smt_present' variable to exported, instead of 'cpu_smt_control'. Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") Reported-by: Igor Mammedov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Joe Mario Cc: Jiri Kosina Cc: Peter Zijlstra Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/e3a85d585da28cc333ecbc1e78ee9216e6da9396.1548794349.git.jpoimboe@redhat.com --- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx/vmx.c | 3 ++- include/linux/cpu.h | 2 -- kernel/cpu.c | 33 ++++----------------------------- kernel/sched/fair.c | 1 + kernel/smp.c | 2 -- 6 files changed, 8 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de0f4170178..01874d54f4fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -71,7 +71,7 @@ void __init check_bugs(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology_early(); + cpu_smt_check_topology(); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4341175339f3..95d618045001 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm) * Warn upon starting the first VM in a potentially * insecure environment. */ - if (cpu_smt_control == CPU_SMT_ENABLED) + if (sched_smt_active()) pr_warn_once(L1TF_MSG_SMT); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) pr_warn_once(L1TF_MSG_L1D); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 218df7f4d3e1..5041357d0297 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -180,12 +180,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index c0c7f64573ed..d1c6d152da89 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; -EXPORT_SYMBOL_GPL(cpu_smt_control); - -static bool cpu_smt_available __read_mostly; void __init cpu_smt_disable(bool force) { @@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force) /* * The decision whether SMT is supported can only be done after the full - * CPU identification. Called from architecture code before non boot CPUs - * are brought up. - */ -void __init cpu_smt_check_topology_early(void) -{ - if (!topology_smt_supported()) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; -} - -/* - * If SMT was disabled by BIOS, detect it here, after the CPUs have been - * brought online. This ensures the smt/l1tf sysfs entries are consistent - * with reality. cpu_smt_available is set to true during the bringup of non - * boot CPUs when a SMT sibling is detected. Note, this may overwrite - * cpu_smt_control's previous setting. + * CPU identification. Called from architecture code. */ void __init cpu_smt_check_topology(void) { - if (!cpu_smt_available) + if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; } @@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable); static inline bool cpu_smt_allowed(unsigned int cpu) { - if (topology_is_primary_thread(cpu)) + if (cpu_smt_control == CPU_SMT_ENABLED) return true; - /* - * If the CPU is not a 'primary' thread and the booted_once bit is - * set then the processor has SMT support. Store this information - * for the late check of SMT support in cpu_smt_check_topology(). - */ - if (per_cpu(cpuhp_state, cpu).booted_once) - cpu_smt_available = true; - - if (cpu_smt_control == CPU_SMT_ENABLED) + if (topology_is_primary_thread(cpu)) return true; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50aa2aba69bd..310d0637fe4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { diff --git a/kernel/smp.c b/kernel/smp.c index 163c451af42e..f4cf1b0bb3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -584,8 +584,6 @@ void __init smp_init(void) num_nodes, (num_nodes > 1 ? "s" : ""), num_cpus, (num_cpus > 1 ? "s" : "")); - /* Final decision about SMT support */ - cpu_smt_check_topology(); /* Any cleanup work */ smp_cpus_done(setup_max_cpus); } From e15aa3b2b1388c399c1a2ce08550d2cc4f7e3e14 Mon Sep 17 00:00:00 2001 From: Mathias Thore Date: Mon, 28 Jan 2019 10:07:47 +0100 Subject: [PATCH 0366/1436] ucc_geth: Reset BQL queue when stopping device After a timeout event caused by for example a broadcast storm, when the MAC and PHY are reset, the BQL TX queue needs to be reset as well. Otherwise, the device will exhibit severe performance issues even after the storm has ended. Co-authored-by: David Gounaris Signed-off-by: Mathias Thore Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index c3d539e209ed..eb3e65e8868f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1879,6 +1879,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) u16 i, j; u8 __iomem *bd; + netdev_reset_queue(ugeth->ndev); + ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; From 1dbd449c9943e3145148cc893c2461b72ba6fef0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:36 -0500 Subject: [PATCH 0367/1436] fs/dcache: Fix incorrect nr_dentry_unused accounting in shrink_dcache_sb() The nr_dentry_unused per-cpu counter tracks dentries in both the LRU lists and the shrink lists where the DCACHE_LRU_LIST bit is set. The shrink_dcache_sb() function moves dentries from the LRU list to a shrink list and subtracts the dentry count from nr_dentry_unused. This is incorrect as the nr_dentry_unused count will also be decremented in shrink_dentry_list() via d_shrink_del(). To fix this double decrement, the decrement in the shrink_dcache_sb() function is taken out. Fixes: 4e717f5c1083 ("list_lru: remove special case function list_lru_dispose_all." Cc: stable@kernel.org Signed-off-by: Waiman Long Reviewed-by: Dave Chinner Signed-off-by: Linus Torvalds --- fs/dcache.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 2593153471cf..44e5652b2664 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1188,15 +1188,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, */ void shrink_dcache_sb(struct super_block *sb) { - long freed; - do { LIST_HEAD(dispose); - freed = list_lru_walk(&sb->s_dentry_lru, + list_lru_walk(&sb->s_dentry_lru, dentry_lru_isolate_shrink, &dispose, 1024); - - this_cpu_sub(nr_dentry_unused, freed); shrink_dentry_list(&dispose); } while (list_lru_count(&sb->s_dentry_lru) > 0); } From 7d10f70fc198877b43d92bdcd7604279788b9568 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:37 -0500 Subject: [PATCH 0368/1436] fs: Don't need to put list_lru into its own cacheline The list_lru structure is essentially just a pointer to a table of per-node LRU lists. Even if CONFIG_MEMCG_KMEM is defined, the list field is just used for LRU list registration and shrinker_id is set at initialization. Those fields won't need to be touched that often. So there is no point to make the list_lru structures to sit in their own cachelines. Signed-off-by: Waiman Long Reviewed-by: Dave Chinner Signed-off-by: Linus Torvalds --- include/linux/fs.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..29d8e2cfed0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1479,11 +1479,12 @@ struct super_block { struct user_namespace *s_user_ns; /* - * Keep the lru lists last in the structure so they always sit on their - * own individual cachelines. + * The list_lru structure is essentially just a pointer to a table + * of per-node lru lists, each of which has its own spinlock. + * There is no need to put them into separate cachelines. */ - struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; - struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct list_lru s_dentry_lru; + struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; From af0c9af1b3f66052c369d08be3f60fa9a9559e48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:38 -0500 Subject: [PATCH 0369/1436] fs/dcache: Track & report number of negative dentries The current dentry number tracking code doesn't distinguish between positive & negative dentries. It just reports the total number of dentries in the LRU lists. As excessive number of negative dentries can have an impact on system performance, it will be wise to track the number of positive and negative dentries separately. This patch adds tracking for the total number of negative dentries in the system LRU lists and reports it in the 5th field in the /proc/sys/fs/dentry-state file. The number, however, does not include negative dentries that are in flight but not in the LRU yet as well as those in the shrinker lists which are on the way out anyway. The number of positive dentries in the LRU lists can be roughly found by subtracting the number of negative dentries from the unused count. Matthew Wilcox had confirmed that since the introduction of the dentry_stat structure in 2.1.60, the dummy array was there, probably for future extension. They were not replacements of pre-existing fields. So no sane applications that read the value of /proc/sys/fs/dentry-state will do dummy thing if the last 2 fields of the sysctl parameter are not zero. IOW, it will be safe to use one of the dummy array entry for negative dentry count. Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 24 +++++++++++++++--------- fs/dcache.c | 32 ++++++++++++++++++++++++++++++++ include/linux/dcache.h | 7 ++++--- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 819caf8ca05f..58649bd4fcfc 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -56,26 +56,32 @@ of any kernel data structures. dentry-state: -From linux/fs/dentry.c: +From linux/include/linux/dcache.h: -------------------------------------------------------------- -struct { +struct dentry_stat_t dentry_stat { int nr_dentry; int nr_unused; int age_limit; /* age in seconds */ int want_pages; /* pages requested by system */ - int dummy[2]; -} dentry_stat = {0, 0, 45, 0,}; --------------------------------------------------------------- + int nr_negative; /* # of unused negative dentries */ + int dummy; /* Reserved for future use */ +}; +-------------------------------------------------------------- + +Dentries are dynamically allocated and deallocated. + +nr_dentry shows the total number of dentries allocated (active ++ unused). nr_unused shows the number of dentries that are not +actively used, but are saved in the LRU list for future reuse. -Dentries are dynamically allocated and deallocated, and -nr_dentry seems to be 0 all the time. Hence it's safe to -assume that only nr_unused, age_limit and want_pages are -used. Nr_unused seems to be exactly what its name says. Age_limit is the age in seconds after which dcache entries can be reclaimed when memory is short and want_pages is nonzero when shrink_dcache_pages() has been called and the dcache isn't pruned yet. +nr_negative shows the number of unused dentries that are also +negative dentries which do not mapped to actual files. + ============================================================== dquot-max & dquot-nr: diff --git a/fs/dcache.c b/fs/dcache.c index 44e5652b2664..aac41adf4743 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = { static DEFINE_PER_CPU(long, nr_dentry); static DEFINE_PER_CPU(long, nr_dentry_unused); +static DEFINE_PER_CPU(long, nr_dentry_negative); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) @@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void) return sum < 0 ? 0 : sum; } +static long get_nr_dentry_negative(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_negative, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); + dentry_stat.nr_negative = get_nr_dentry_negative(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_inc(nr_dentry_negative); } static void dentry_free(struct dentry *dentry) @@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry) * The per-cpu "nr_dentry_unused" counters are updated with * the DCACHE_LRU_LIST bit. * + * The per-cpu "nr_dentry_negative" counters are only updated + * when deleted from or added to the per-superblock LRU list, not + * from/to the shrink list. That is to avoid an unneeded dec/inc + * pair when moving from LRU to shrink list in select_collect(). + * * These helper functions make sure we always follow the * rules. d_lock must be held by the caller. */ @@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry) D_FLAG_VERIFY(dentry, 0); dentry->d_flags |= DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_inc(nr_dentry_negative); WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate(lru, &dentry->d_lru); } @@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags |= DCACHE_SHRINK_LIST; + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate_move(lru, &dentry->d_lru, list); } @@ -1816,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); + /* + * Decrement negative dentry count if it was in the LRU list. + */ + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ef4b70f64f33..60996e64c579 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -62,9 +62,10 @@ extern const struct qstr slash_name; struct dentry_stat_t { long nr_dentry; long nr_unused; - long age_limit; /* age in seconds */ - long want_pages; /* pages requested by system */ - long dummy[2]; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long nr_negative; /* # of unused negative dentries */ + long dummy; /* Reserved for future use */ }; extern struct dentry_stat_t dentry_stat; From 15efb47dc560849d0c07db96fdad5121f2cf736e Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 30 Jan 2019 18:26:02 +0100 Subject: [PATCH 0370/1436] PM-runtime: Fix deadlock with ktime_get() A deadlock has been seen when swicthing clocksources which use PM-runtime. The call path is: change_clocksource ... write_seqcount_begin ... timekeeping_update ... sh_cmt_clocksource_enable ... rpm_resume pm_runtime_mark_last_busy ktime_get do read_seqcount_begin while read_seqcount_retry .... write_seqcount_end Although we should be safe because we haven't yet changed the clocksource at that time, we can't do that because of seqcount protection. Use ktime_get_mono_fast_ns() instead which is lock safe for such cases. With ktime_get_mono_fast_ns, the timestamp is not guaranteed to be monotonic across an update and as a result can goes backward. According to update_fast_timekeeper() description: "In the worst case, this can result is a slightly wrong timestamp (a few nanoseconds)". For PM-runtime autosuspend, this means only that the suspend decision may be slightly suboptimal. Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Reported-by: Biju Das Signed-off-by: Vincent Guittot Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 10 +++++----- include/linux/pm_runtime.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..0ea2139c50d8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -130,7 +130,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev) { int autosuspend_delay; u64 last_busy, expires = 0; - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_mono_fast_ns(); if (!dev->power.use_autosuspend) goto out; @@ -909,7 +909,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) * If 'expires' is after the current time, we've been called * too early. */ - if (expires > 0 && expires < ktime_to_ns(ktime_get())) { + if (expires > 0 && expires < ktime_get_mono_fast_ns()) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); @@ -928,7 +928,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) int pm_schedule_suspend(struct device *dev, unsigned int delay) { unsigned long flags; - ktime_t expires; + u64 expires; int retval; spin_lock_irqsave(&dev->power.lock, flags); @@ -945,8 +945,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); - expires = ktime_add(ktime_get(), ms_to_ktime(delay)); - dev->power.timer_expires = ktime_to_ns(expires); + expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC; + dev->power.timer_expires = expires; dev->power.timer_autosuspends = 0; hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 54af4eef169f..fed5be706bc9 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get())); + WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } static inline bool pm_runtime_is_irq_safe(struct device *dev) From 1617971c6616c87185cbc78fa1a86dfc70dd16b6 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Wed, 30 Jan 2019 08:28:22 -0800 Subject: [PATCH 0371/1436] cpuidle: poll_state: Fix default time limit The default time is declared in units of microsecnds, but is used as nanoseconds, resulting in significant accounting errors for idle state 0 time when all idle states deeper than 0 are disabled. Under these unusual conditions, we don't really care about the poll time limit anyhow. Fixes: 800fb34a99ce ("cpuidle: poll_state: Disregard disable idle states") Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/poll_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index b17d153e724f..23a1b27579a5 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -21,7 +21,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; - u64 limit = TICK_USEC; + u64 limit = TICK_NSEC; int i; for (i = 1; i < drv->state_count; i++) { From feaf5c796b3f0240f10d0d6d0b686715fd58a05b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Jan 2019 22:23:48 +0100 Subject: [PATCH 0372/1436] net: ip_gre: always reports o_key to userspace Erspan protocol (version 1 and 2) relies on o_key to configure session id header field. However TUNNEL_KEY bit is cleared in erspan_xmit since ERSPAN protocol does not set the key field of the external GRE header and so the configured o_key is not reported to userspace. The issue can be triggered with the following reproducer: $ip link add erspan1 type erspan local 192.168.0.1 remote 192.168.0.2 \ key 1 seq erspan_ver 1 $ip link set erspan1 up $ip -d link sh erspan1 erspan1@NONE: mtu 1450 qdisc pfifo_fast state UNKNOWN mode DEFAULT link/ether 52:aa:99:95:9a:b5 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 erspan remote 192.168.0.2 local 192.168.0.1 ttl inherit ikey 0.0.0.1 iseq oseq erspan_index 0 Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in ipgre_fill_info Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 20a64fe6254b..3978f807fa8b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1455,12 +1455,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || From c706863bc8902d0c2d1a5a27ac8e1ead5d06b79d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Jan 2019 22:23:49 +0100 Subject: [PATCH 0373/1436] net: ip6_gre: always reports o_key to userspace As Erspan_v4, Erspan_v6 protocol relies on o_key to configure session id header field. However TUNNEL_KEY bit is cleared in ip6erspan_tunnel_xmit since ERSPAN protocol does not set the key field of the external GRE header and so the configured o_key is not reported to userspace. The issue can be triggered with the following reproducer: $ip link add ip6erspan1 type ip6erspan local 2000::1 remote 2000::2 \ key 1 seq erspan_ver 1 $ip link set ip6erspan1 up ip -d link sh ip6erspan1 ip6erspan1@NONE: mtu 1422 qdisc noop state DOWN mode DEFAULT link/ether ba:ff:09:24:c3:0e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 ip6erspan remote 2000::2 local 2000::1 encaplimit 4 flowlabel 0x00000 ikey 0.0.0.1 iseq oseq Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in ip6gre_fill_info Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4416368dbd49..801a9a0c217e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || From 8be4d9a492f88b96d4d3a06c6cbedbc40ca14c83 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:53 +0900 Subject: [PATCH 0374/1436] virtio_net: Don't enable NAPI when interface is down Commit 4e09ff536284 ("virtio-net: disable NAPI only when enabled during XDP set") tried to fix inappropriate NAPI enabling/disabling when !netif_running(), but was not complete. On error path virtio_net could enable NAPI even when !netif_running(). This can cause enabling NAPI twice on virtnet_open(), which would trigger BUG_ON() in napi_enable(). Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8fadd8eaf601..8e4c5d432206 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2430,8 +2430,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return 0; err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; From 534da5e856334fb54cb0272a9fb3afec28ea3aed Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:54 +0900 Subject: [PATCH 0375/1436] virtio_net: Don't call free_old_xmit_skbs for xdp_frames When napi_tx is enabled, virtnet_poll_cleantx() called free_old_xmit_skbs() even for xdp send queue. This is bogus since the queue has xdp_frames, not sk_buffs, thus mangled device tx bytes counters because skb->len is meaningless value, and even triggered oops due to general protection fault on freeing them. Since xdp send queues do not aquire locks, old xdp_frames should be freed only in virtnet_xdp_xmit(), so just skip free_old_xmit_skbs() for xdp send queues. Similarly virtnet_poll_tx() called free_old_xmit_skbs(). This NAPI handler is called even without calling start_xmit() because cb for tx is by default enabled. Once the handler is called, it enabled the cb again, and then the handler would be called again. We don't need this handler for XDP, so don't enable cb as well as not calling free_old_xmit_skbs(). Also, we need to disable tx NAPI when disabling XDP, so virtnet_poll_tx() can safely access curr_queue_pairs and xdp_queue_pairs, which are not atomically updated while disabling XDP. Fixes: b92f1e6751a6 ("virtio-net: transmit napi") Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 49 +++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e4c5d432206..046f955495a8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1358,6 +1358,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) u64_stats_update_end(&sq->stats.syncp); } +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1365,7 +1375,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); - if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return; if (__netif_tx_trylock(txq)) { @@ -1442,8 +1452,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq; + if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); @@ -2402,9 +2420,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } /* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); @@ -2423,16 +2444,22 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } return 0; err: if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); @@ -2615,16 +2642,6 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; From 188313c137c4f76afd0862f50dbc185b198b9e2a Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:55 +0900 Subject: [PATCH 0376/1436] virtio_net: Fix not restoring real_num_rx_queues When _virtnet_set_queues() failed we did not restore real_num_rx_queues. Fix this by placing the change of real_num_rx_queues after _virtnet_set_queues(). This order is also in line with virtnet_set_channels(). Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 046f955495a8..0e1a36908419 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2427,10 +2427,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } } - netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; for (i = 0; i < vi->max_queue_pairs; i++) { From 1667c08a9d31c7cdf09f4890816bfbf20b685495 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:56 +0900 Subject: [PATCH 0377/1436] virtio_net: Fix out of bounds access of sq When XDP is disabled, curr_queue_pairs + smp_processor_id() can be larger than max_queue_pairs. There is no guarantee that we have enough XDP send queues dedicated for each cpu when XDP is disabled, so do not count drops on sq in that case. Fixes: 5b8f3c8d30a6 ("virtio_net: Add XDP related stats") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0e1a36908419..669b65c232a2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -491,20 +491,17 @@ static int virtnet_xdp_xmit(struct net_device *dev, int ret, err; int i; - sq = virtnet_xdp_sq(vi); - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - drops = n; - goto out; - } - /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) { - ret = -ENXIO; + if (!xdp_prog) + return -ENXIO; + + sq = virtnet_xdp_sq(vi); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; drops = n; goto out; } From 03aa6d34868c07b2b1b8b2db080602d7ec528173 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:57 +0900 Subject: [PATCH 0378/1436] virtio_net: Don't process redirected XDP frames when XDP is disabled Commit 8dcc5b0ab0ec ("virtio_net: fix ndo_xdp_xmit crash towards dev not ready for XDP") tried to avoid access to unexpected sq while XDP is disabled, but was not complete. There was a small window which causes out of bounds sq access in virtnet_xdp_xmit() while disabling XDP. An example case of - curr_queue_pairs = 6 (2 for SKB and 4 for XDP) - online_cpu_num = xdp_queue_paris = 4 when XDP is enabled: CPU 0 CPU 1 (Disabling XDP) (Processing redirected XDP frames) virtnet_xdp_xmit() virtnet_xdp_set() _virtnet_set_queues() set curr_queue_pairs (2) check if rq->xdp_prog is not NULL virtnet_xdp_sq(vi) qp = curr_queue_pairs - xdp_queue_pairs + smp_processor_id() = 2 - 4 + 1 = -1 sq = &vi->sq[qp] // out of bounds access set xdp_queue_pairs (0) rq->xdp_prog = NULL Basically we should not change curr_queue_pairs and xdp_queue_pairs while someone can read the values. Thus, when disabling XDP, assign NULL to rq->xdp_prog first, and wait for RCU grace period, then change xxx_queue_pairs. Note that we need to keep the current order when enabling XDP though. - v2: Make rcu_assign_pointer/synchronize_net conditional instead of _virtnet_set_queues. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 669b65c232a2..cea52e47dc65 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2410,6 +2410,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return -ENOMEM; } + old_prog = rtnl_dereference(vi->rq[0].xdp_prog); + if (!prog && !old_prog) + return 0; + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) @@ -2424,21 +2428,30 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } } + if (!prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) + virtnet_restore_guest_offloads(vi); + } + synchronize_net(); + } + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; - for (i = 0; i < vi->max_queue_pairs; i++) { - old_prog = rtnl_dereference(vi->rq[i].xdp_prog); - rcu_assign_pointer(vi->rq[i].xdp_prog, prog); - if (i == 0) { - if (!old_prog) + if (prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); - if (!prog) - virtnet_restore_guest_offloads(vi); } + } + + for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); if (netif_running(dev)) { @@ -2451,6 +2464,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return 0; err: + if (!prog) { + virtnet_clear_guest_offloads(vi); + for (i = 0; i < vi->max_queue_pairs; i++) + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); From 07b344f494ddda9f061b396407c96df8c46c82b5 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:58 +0900 Subject: [PATCH 0379/1436] virtio_net: Use xdp_return_frame to free xdp_frames on destroying vqs put_page() can work as a fallback for freeing xdp_frames, but the appropriate way is to use xdp_return_frame(). Fixes: cac320c850ef ("virtio_net: convert to use generic xdp_frame and xdp_return_frame API") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Jesper Dangaard Brouer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cea52e47dc65..1d454cee641c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2669,7 +2669,7 @@ static void free_unused_bufs(struct virtnet_info *vi) if (!is_xdp_raw_buffer_queue(vi, i)) dev_kfree_skb(buf); else - put_page(virt_to_head_page(buf)); + xdp_return_frame(buf); } } From 5050471d35d1316ba32dfcbb409978337eb9e75e Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 29 Jan 2019 09:45:59 +0900 Subject: [PATCH 0380/1436] virtio_net: Differentiate sk_buff and xdp_frame on freeing We do not reset or free up unused buffers when enabling/disabling XDP, so it can happen that xdp_frames are freed after disabling XDP or sk_buffs are freed after enabling XDP on xdp tx queues. Thus we need to handle both forms (xdp_frames and sk_buffs) regardless of XDP setting. One way to trigger this problem is to disable XDP when napi_tx is enabled. In that case, virtnet_xdp_set() calls virtnet_napi_enable() which kicks NAPI. The NAPI handler will call virtnet_poll_cleantx() which invokes free_old_xmit_skbs() for queues which have been used by XDP. Note that even with this change we need to keep skipping free_old_xmit_skbs() from NAPI handlers when XDP is enabled, because XDP tx queues do not aquire queue locks. - v2: Use napi_consume_skb() instead of dev_consume_skb_any() Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") Signed-off-by: Toshiaki Makita Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1d454cee641c..259448182272 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644); #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1) +#define VIRTIO_XDP_FLAG BIT(0) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -252,6 +254,21 @@ struct padded_vnet_hdr { char padding[4]; }; +static bool is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & VIRTIO_XDP_FLAG; +} + +static void *xdp_to_ptr(struct xdp_frame *ptr) +{ + return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); +} + +static struct xdp_frame *ptr_to_xdp(void *ptr) +{ + return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ @@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -482,13 +500,13 @@ static int virtnet_xdp_xmit(struct net_device *dev, { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; - struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; int drops = 0; int kicks = 0; int ret, err; + void *ptr; int i; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this @@ -507,8 +525,12 @@ static int virtnet_xdp_xmit(struct net_device *dev, } /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(is_xdp_frame(ptr))) + xdp_return_frame(ptr_to_xdp(ptr)); + else + napi_consume_skb(ptr, false); + } for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@ -1329,18 +1351,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget, static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - struct sk_buff *skb; unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; + void *ptr; - while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { - pr_debug("Sent skb %p\n", skb); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; - bytes += skb->len; + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += frame->len; + xdp_return_frame(frame); + } packets++; - - napi_consume_skb(skb, in_napi); } /* Avoid overhead when no packets have been processed @@ -2666,10 +2696,10 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_raw_buffer_queue(vi, i)) + if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else - xdp_return_frame(buf); + xdp_return_frame(ptr_to_xdp(buf)); } } From ef489749aae508e6f17886775c075f12ff919fb1 Mon Sep 17 00:00:00 2001 From: Yohei Kanemaru Date: Tue, 29 Jan 2019 15:52:34 +0900 Subject: [PATCH 0381/1436] ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation skb->cb may contain data from previous layers (in an observed case IPv4 with L3 Master Device). In the observed scenario, the data in IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size, eventually caused an unexpected IPv6 fragmentation in ip6_fragment() through ip6_finish_output(). This patch clears IP6CB(skb), which potentially contains garbage data, on the SRH ip4ip6 encapsulation. Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation") Signed-off-by: Yohei Kanemaru Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; From 3a03cb8456cc1d61c467a5375e0a10e5207b948c Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Tue, 29 Jan 2019 22:50:13 -0500 Subject: [PATCH 0382/1436] tun: move the call to tun_set_real_num_queues Call tun_set_real_num_queues() after the increment of tun->numqueues since the former depends on it. Otherwise, the number of queues is not correctly accounted for, which results to warnings similar to: "vnet0 selects TX queue 11, but real number of TX queues is 11". Fixes: 0b7959b62573 ("tun: publish tfile after it's fully initialized") Reported-and-tested-by: George Amanakis Signed-off-by: George Amanakis Signed-off-by: Stanislav Fomichev Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 18656c4094b3..fed298c0cb39 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (rtnl_dereference(tun->xdp_prog)) sock_set_flag(&tfile->sk, SOCK_XDP); - tun_set_real_num_queues(tun); - /* device is allowed to go away first, so no need to hold extra * refcnt. */ @@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; + tun_set_real_num_queues(tun); out: return err; } From 4522a70db7aa5e77526a4079628578599821b193 Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Wed, 30 Jan 2019 14:55:14 +0800 Subject: [PATCH 0383/1436] l2tp: fix reading optional fields of L2TPv3 Use pskb_may_pull() to make sure the optional fields are in skb linear parts, so we can safely read them later. It's easy to reproduce the issue with a net driver that supports paged skb data. Just create a L2TPv3 over IP tunnel and then generates some network traffic. Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase. Changes in v4: 1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/ 2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/ 3. Add 'Fixes' in commit messages. Changes in v3: 1. To keep consistency, move the code out of l2tp_recv_common. 2. Use "net" instead of "net-next", since this is a bug fix. Changes in v2: 1. Only fix L2TPv3 to make code simple. To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common. It's complicated to do so. 2. Reloading pointers after pskb_may_pull Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support") Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ net/l2tp/l2tp_core.h | 20 ++++++++++++++++++++ net/l2tp/l2tp_ip.c | 3 +++ net/l2tp/l2tp_ip6.c | 3 +++ 4 files changed, 30 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..dd5ba0c11ab3 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -884,6 +884,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) } #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); From d5256083f62e2720f75bb3c5a928a0afe47d6bc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Jan 2019 12:49:48 +0100 Subject: [PATCH 0384/1436] ipvlan, l3mdev: fix broken l3s mode wrt local routes While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, I ran into the issue that while l3 mode is working fine, l3s mode does not have any connectivity to kube-apiserver and hence all pods end up in Error state as well. The ipvlan master device sits on top of a bond device and hostns traffic to kube-apiserver (also running in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 where the latter is the address of the bond0. While in l3 mode, a curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 works fine from hostns, neither of them do in case of l3s. In the latter only a curl to https://127.0.0.1:37573 appeared to work where for local addresses of bond0 I saw kernel suddenly starting to emit ARP requests to query HW address of bond0 which remained unanswered and neighbor entries in INCOMPLETE state. These ARP requests only happen while in l3s. Debugging this further, I found the issue is that l3s mode is piggy- backing on l3 master device, and in this case local routes are using l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback"). I found that reverting them back into using the net->loopback_dev fixed ipvlan l3s connectivity and got everything working for the CNI. Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the l3mdev paper in [0] the only sole reason why ipvlan l3s is relying on l3 master device is to get the l3mdev_ip_rcv() receive hook for setting the dst entry of the input route without adding its own ipvlan specific hacks into the receive path, however, any l3 domain semantics beyond just that are breaking l3s operation. Note that ipvlan also has the ability to dynamically switch its internal operation from l3 to l3s for all ports via ipvlan_set_port_mode() at runtime. In any case, l3 vs l3s soley distinguishes itself by 'de-confusing' netfilter through switching skb->dev to ipvlan slave device late in NF_INET_LOCAL_IN before handing the skb to L4. Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook without any additional l3mdev semantics on top. This should also have minimal impact since dev->priv_flags is already hot in cache. With this set, l3s mode is working fine and I also get things like masquerading pod traffic on the ipvlan master properly working. [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: David Ahern Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 6 +++--- include/linux/netdevice.h | 8 ++++++++ include/net/l3mdev.h | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..7cdac77d0c68 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -100,12 +100,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); if (!err) { mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; + mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER; } else goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } @@ -167,7 +167,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct sk_buff *skb; if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(dev_net(dev)); dev->l3mdev_ops = NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..86dbb3e29139 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,6 +1483,7 @@ struct net_device_ops { * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1514,6 +1515,7 @@ enum netdev_priv_flags { IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1544,6 +1546,7 @@ enum netdev_priv_flags { #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER /** * struct net_device - The DEVICE structure. @@ -4549,6 +4552,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) From 4ec5302fa906ec9d86597b236f62315bacdb9622 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:19 +0100 Subject: [PATCH 0385/1436] net: stmmac: Fallback to Platform Data clock in Watchdog conversion If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean. This way we can still use the coalesce command in PCI based setups. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1f61c25d82b..5d85742a2be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; From c5acdbee22a1b200dde07effd26fd1f649e9ab8a Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:20 +0100 Subject: [PATCH 0386/1436] net: stmmac: Send TSO packets always from Queue 0 The number of TSO enabled channels in HW can be different than the number of total channels. There is no way to determined, at runtime, the number of TSO capable channels and its safe to assume that if TSO is enabled then at least channel 0 will be TSO capable. Lets always send TSO packets from Queue 0. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5afba69981cf..6656008068de 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3025,8 +3025,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { From e2cd682deb231ba6f80524bb84e57e7138261149 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:21 +0100 Subject: [PATCH 0387/1436] net: stmmac: Disable EEE mode earlier in XMIT callback In stmmac xmit callback we use a different flow for TSO packets but TSO xmit callback is not disabling the EEE mode. Fix this by disabling earlier the EEE mode, i.e. before calling the TSO xmit callback. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6656008068de..685d20472358 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3023,6 +3023,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q = &priv->tx_queue[queue]; + if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { @@ -3050,9 +3053,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); From a07ddce4df807e41a85245e769b6f6f14f0c6db0 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Wed, 30 Jan 2019 11:13:53 +0800 Subject: [PATCH 0388/1436] usb: typec: tcpm: Correct the PPS out_volt calculation When Sink negotiates PPS, the voltage range of selected PPS APDO might not cover the previous voltage (out_volt). If the previous out_volt is lower than the new min_volt, the output voltage in RDO might be set to an invalid value. For instance, supposed that the previous voltage is 5V, and the new voltage range in the APDO is 7V-12V. Then the output voltage in the RDO should not be set to 5V which is lower than the possible min_volt 7V. Fix this by choosing the maximal value between the previous voltage and the new min_volt first. And ensure that this value will not exceed the new max_volt. The new out_volt will fall within the new voltage range while being the closest value compared to the previous out_volt. Signed-off-by: Kyle Tso Reviewed-by: Adam Thomson Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Fixes: c710d0bb76ff0 ("usb: typec: tcpm: Extend the matching rules on PPS APDO selection") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 4bc29b586698..f1c39a3c7534 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2297,7 +2297,8 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port) pdo_pps_apdo_max_voltage(snk)); port->pps_data.max_curr = min_pps_apdo_current(src, snk); port->pps_data.out_volt = min(port->pps_data.max_volt, - port->pps_data.out_volt); + max(port->pps_data.min_volt, + port->pps_data.out_volt)); port->pps_data.op_curr = min(port->pps_data.max_curr, port->pps_data.op_curr); } From 579b9239c1f38665b21e8d0e6ee83ecc96dbd6bb Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 23 Jan 2019 11:51:38 +0530 Subject: [PATCH 0389/1436] powerpc/radix: Fix kernel crash with mremap() With support for split pmd lock, we use pmd page pmd_huge_pte pointer to store the deposited page table. In those config when we move page tables we need to make sure we move the deposited page table to the correct pmd page. Otherwise this can result in crash when we withdraw of deposited page table because we can find the pmd_huge_pte NULL. eg: __split_huge_pmd+0x1070/0x1940 __split_huge_pmd+0xe34/0x1940 (unreliable) vma_adjust_trans_huge+0x110/0x1c0 __vma_adjust+0x2b4/0x9b0 __split_vma+0x1b8/0x280 __do_munmap+0x13c/0x550 sys_mremap+0x220/0x7e0 system_call+0x5c/0x70 Fixes: 675d995297d4 ("powerpc/book3s64: Enable split pmd ptlock.") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 22 +++++++------------- arch/powerpc/mm/pgtable-book3s64.c | 22 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2e6ada28da64..c9bfe526ca9d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1258,21 +1258,13 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; -static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, - struct spinlock *old_pmd_ptl, - struct vm_area_struct *vma) -{ - if (radix_enabled()) - return false; - /* - * Archs like ppc64 use pgtable to store per pmd - * specific information. So when we switch the pmd, - * we should also withdraw and deposit the pgtable - */ - return true; -} - - +extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma); +/* + * Hash translation mode use the deposited table to store hash pte + * slot information. + */ #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit static inline bool arch_needs_pgtable_deposit(void) { diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f3c31f5e1026..ecd31569a120 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -400,3 +400,25 @@ void arch_report_meminfo(struct seq_file *m) atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); } #endif /* CONFIG_PROC_FS */ + +/* + * For hash translation mode, we use the deposited table to store hash slot + * information and they are stored at PTRS_PER_PMD offset from related pmd + * location. Hence a pmd move requires deposit and withdraw. + * + * For radix translation with split pmd ptl, we store the deposited table in the + * pmd page. Hence if we have different pmd page we need to withdraw during pmd + * move. + * + * With hash we use deposited table always irrespective of anon or not. + * With radix we use deposited table only for anonymous mapping. + */ +int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma) +{ + if (radix_enabled()) + return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); + + return true; +} From 13c15e07eedf26092054c8c71f2f47edb8388310 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 24 Jan 2019 15:20:07 +0100 Subject: [PATCH 0390/1436] mtd: spinand: Handle the case where PROGRAM LOAD does not reset the cache Looks like PROGRAM LOAD (AKA write cache) does not necessarily reset the cache content to 0xFF (depends on vendor implementation), so we must fill the page cache entirely even if we only want to program the data portion of the page, otherwise we might corrupt the BBM or user data previously programmed in OOB area. Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs") Reported-by: Stefan Roese Cc: Signed-off-by: Boris Brezillon Tested-by: Stefan Roese Reviewed-by: Stefan Roese Acked-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 479c2f2cf17f..8bf37da19663 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -304,24 +304,30 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, struct nand_device *nand = spinand_to_nand(spinand); struct mtd_info *mtd = nanddev_to_mtd(nand); struct nand_page_io_req adjreq = *req; - unsigned int nbytes = 0; - void *buf = NULL; + void *buf = spinand->databuf; + unsigned int nbytes; u16 column = 0; int ret; - memset(spinand->databuf, 0xff, - nanddev_page_size(nand) + - nanddev_per_page_oobsize(nand)); + /* + * Looks like PROGRAM LOAD (AKA write cache) does not necessarily reset + * the cache content to 0xFF (depends on vendor implementation), so we + * must fill the page cache entirely even if we only want to program + * the data portion of the page, otherwise we might corrupt the BBM or + * user data previously programmed in OOB area. + */ + nbytes = nanddev_page_size(nand) + nanddev_per_page_oobsize(nand); + memset(spinand->databuf, 0xff, nbytes); + adjreq.dataoffs = 0; + adjreq.datalen = nanddev_page_size(nand); + adjreq.databuf.out = spinand->databuf; + adjreq.ooblen = nanddev_per_page_oobsize(nand); + adjreq.ooboffs = 0; + adjreq.oobbuf.out = spinand->oobbuf; - if (req->datalen) { + if (req->datalen) memcpy(spinand->databuf + req->dataoffs, req->databuf.out, req->datalen); - adjreq.dataoffs = 0; - adjreq.datalen = nanddev_page_size(nand); - adjreq.databuf.out = spinand->databuf; - nbytes = adjreq.datalen; - buf = spinand->databuf; - } if (req->ooblen) { if (req->mode == MTD_OPS_AUTO_OOB) @@ -332,14 +338,6 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, else memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out, req->ooblen); - - adjreq.ooblen = nanddev_per_page_oobsize(nand); - adjreq.ooboffs = 0; - nbytes += nanddev_per_page_oobsize(nand); - if (!buf) { - buf = spinand->oobbuf; - column = nanddev_page_size(nand); - } } spinand_cache_op_adjust_colum(spinand, &adjreq, &column); @@ -370,8 +368,8 @@ static int spinand_write_to_cache_op(struct spinand_device *spinand, /* * We need to use the RANDOM LOAD CACHE operation if there's - * more than one iteration, because the LOAD operation resets - * the cache to 0xff. + * more than one iteration, because the LOAD operation might + * reset the cache to 0xff. */ if (nbytes) { column = op.addr.val; From c3c7dbf4887ab3ed9d611cd1f6e16937f8700743 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 24 Jan 2019 15:46:54 +0100 Subject: [PATCH 0391/1436] mtd: spinand: Fix the error/cleanup path in spinand_init() The manufacturer specific initialization has already been done when block unlocking takes place, and if anything goes wrong during this procedure we should call spinand_manufacturer_cleanup(). Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs") Cc: Signed-off-by: Boris Brezillon Acked-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 8bf37da19663..fa87ae28cdfe 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1016,11 +1016,11 @@ static int spinand_init(struct spinand_device *spinand) for (i = 0; i < nand->memorg.ntargets; i++) { ret = spinand_select_target(spinand, i); if (ret) - goto err_free_bufs; + goto err_manuf_cleanup; ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED); if (ret) - goto err_free_bufs; + goto err_manuf_cleanup; } ret = nanddev_init(nand, &spinand_ops, THIS_MODULE); From 455e7b387b441ddd1da1df35251c179fe7805b53 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jan 2019 18:21:42 -0800 Subject: [PATCH 0392/1436] mtd: rawnand: fix kernel-doc warnings Fix kernel-doc warnings in drivers/mtd/nand/raw: ../drivers/mtd/nand/raw/nand_base.c:420: warning: Function parameter or member 'chip' not described in 'nand_fill_oob' ../drivers/mtd/nand/raw/nand_bbt.c:173: warning: Function parameter or member 'this' not described in 'read_bbt' ../drivers/mtd/nand/raw/nand_bbt.c:173: warning: Excess function parameter 'chip' description in 'read_bbt' Fixes: 0813621ba898a ("mtd: rawnand: Stop passing mtd_info objects to internal functions") Signed-off-by: Randy Dunlap Cc: Boris Brezillon Cc: Miquel Raynal Cc: Richard Weinberger Cc: linux-mtd@lists.infradead.org Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 1 + drivers/mtd/nand/raw/nand_bbt.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index cca4b24d2ffa..839494ac457c 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -410,6 +410,7 @@ static int nand_check_wp(struct nand_chip *chip) /** * nand_fill_oob - [INTERN] Transfer client buffer to oob + * @chip: NAND chip object * @oob: oob data buffer * @len: oob data write length * @ops: oob ops structure diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c index 1b722fe9213c..19a2b563acdf 100644 --- a/drivers/mtd/nand/raw/nand_bbt.c +++ b/drivers/mtd/nand/raw/nand_bbt.c @@ -158,7 +158,7 @@ static u32 add_marker_len(struct nand_bbt_descr *td) /** * read_bbt - [GENERIC] Read the bad block table starting from page - * @chip: NAND chip object + * @this: NAND chip object * @buf: temporary buffer * @page: the starting page * @num: the number of bbt descriptors to read From 89e3a5682edaa4e5bb334719afb180256ac7bf78 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Wed, 30 Jan 2019 16:12:31 -0700 Subject: [PATCH 0393/1436] ALSA: hda/realtek - Headset microphone support for System76 darp5 On the System76 Darter Pro (darp5), there is a headset microphone input attached to 0x1a that does not have a jack detect. In order to get it working, the pin configuration needs to be set correctly, and the ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC fixup needs to be applied. This is similar to the MIC_NO_PRESENCE fixups for some Dell laptops, except we have a separate microphone jack that is already configured correctly. Signed-off-by: Jeremy Soller Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4139aced63f8..f2523fab5fed 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5634,6 +5634,7 @@ enum { ALC294_FIXUP_ASUS_HEADSET_MIC, ALC294_FIXUP_ASUS_SPK, ALC225_FIXUP_HEADSET_JACK, + ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6580,6 +6581,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_jack, }, + [ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6758,6 +6768,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), From 56841070ccc87b463ac037d2d1f2beb8e5e35f0c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 31 Jan 2019 11:19:43 +0000 Subject: [PATCH 0394/1436] irqchip/gic-v3-its: Fix ITT_entry_size accessor According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens. Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 071b4cbdf010..c848a7cc502e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) From 2c1cf00eeacb784781cf1c9896b8af001246d339 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 13:57:58 +0100 Subject: [PATCH 0395/1436] relay: check return of create_buf_file() properly If create_buf_file() returns an error, don't try to reference it later as a valid dentry pointer. This problem was exposed when debugfs started to return errors instead of just NULL for some calls when they do not succeed properly. Also, the check for WARN_ON(dentry) was just wrong :) Reported-by: Kees Cook Reported-and-tested-by: syzbot+16c3a70e1e9b29346c43@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Cc: Andrew Morton Cc: David Rientjes Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Greg Kroah-Hartman --- kernel/relay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..9e0f52375487 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL; kfree(tmpname); @@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; } From d339adc12a4f885b572c5412e4869af8939db854 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 31 Jan 2019 13:46:07 +0100 Subject: [PATCH 0396/1436] CIFS: fix use-after-free of the lease keys The request buffers are freed right before copying the pointers. Use the func args instead which are identical and still valid. Simple reproducer (requires KASAN enabled) on a cifs mount: echo foo > foo ; tail -f foo & rm foo Cc: # 4.20 Fixes: 179e44d49c2f ("smb3: add tracepoint for sending lease break responses to server") Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Paulo Alcantara --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ef52d6642431..77b3aaa39b35 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -4441,8 +4441,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); - please_key_low = (__u64 *)req->LeaseKey; - please_key_high = (__u64 *)(req->LeaseKey+8); + please_key_low = (__u64 *)lease_key; + please_key_high = (__u64 *)(lease_key+8); if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid, From b9b9378b49030d1aeca2387660fcd1ac1f306cad Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 29 Jan 2019 17:27:33 -0600 Subject: [PATCH 0397/1436] cifs: update internal module version number To 2.17 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d1f9c2f3f575..7652551a1fc4 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.16" +#define CIFS_VERSION "2.17" #endif /* _CIFSFS_H */ From 36991ca68db9dd43bac7f3519f080ee3939263ef Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Jan 2019 14:48:54 +0100 Subject: [PATCH 0398/1436] blk-mq: protect debugfs_create_files() from failures If debugfs were to return a non-NULL error for a debugfs call, using that pointer later in debugfs_create_files() would crash. Fix that by properly checking the pointer before referencing it. Reported-by: Michal Hocko Reported-and-tested-by: syzbot+b382ba6a802a3d242790@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Signed-off-by: Greg Kroah-Hartman --- block/blk-mq-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 90d68760af08..777638f597aa 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -838,6 +838,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { static bool debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { + if (IS_ERR_OR_NULL(parent)) + return false; + d_inode(parent)->i_private = data; for (; attr->name; attr++) { From 3864be551405b582ae38fbcdfdbb6e49052f5f6d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 30 Jan 2019 14:38:11 +0200 Subject: [PATCH 0399/1436] iwlwifi: mvm: fix merge damage in iwl_mvm_rx_mpdu_mq() A call to iwl_mvm_add_rtap_sniffer_config() was missing due to a merge damage when I submitted the patch mentioned below. And this causes the following compilation warning: drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:195:13: warning: 'iwl_mvm_add_rtap_sniffer_config' defined but not used [-Wunused-function] static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it by adding the if block that calls this function. Fixes: 9bf13bee2d74 ("iwlwifi: mvm: include configured sniffer AID in radiotap") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 79860f6ac7ca..2c56f73d688e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -1476,6 +1476,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, rx_status->ampdu_reference = mvm->ampdu_ref; } + if (unlikely(mvm->monitor_on)) + iwl_mvm_add_rtap_sniffer_config(mvm, skb); + rcu_read_lock(); if (desc->status & cpu_to_le16(IWL_RX_MPDU_STATUS_SRC_STA_FOUND)) { From 32a66374487bbd78115747f0084cea5e8fca4c80 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Sat, 26 Jan 2019 10:38:29 -0600 Subject: [PATCH 0400/1436] fpga: stratix10-soc: fix wrong of_node_put() in init function After finding a "firmware" dt node stratix10 tries to match it's compatible string with it. To do so it's calling of_find_matching_node() which already takes care of decreasing the refcount on the "firmware" node. We are then incorrectly decreasing the refcount on that node again. This patch removes the unwarranted call to of_node_put(). Fixes: e7eef1d7633a ("fpga: add intel stratix10 soc fpga manager driver") Signed-off-by: Nicolas Saenz Julienne Acked-by: Alan Tull Acked-by: Moritz Fischer [atull: remove unnecessary braces] Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/stratix10-soc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c index a1a09e04fab8..13851b3d1c56 100644 --- a/drivers/fpga/stratix10-soc.c +++ b/drivers/fpga/stratix10-soc.c @@ -508,14 +508,11 @@ static int __init s10_init(void) return -ENODEV; np = of_find_matching_node(fw_np, s10_of_match); - if (!np) { - of_node_put(fw_np); + if (!np) return -ENODEV; - } of_node_put(np); ret = of_platform_populate(fw_np, s10_of_match, NULL, NULL); - of_node_put(fw_np); if (ret) return ret; From 9a6d5488002fdca7134a0e59b0ae252f61042810 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Jan 2019 08:41:40 -0700 Subject: [PATCH 0401/1436] ide: ensure atapi sense request aren't preempted There's an issue with how sense requests are handled in IDE. If ide-cd encounters an error, it queues a sense request. With how IDE request handling is done, this is the next request we need to handle. But it's impossible to guarantee this, as another request could come in between the sense being queued, and ->queue_rq() being run and handling it. If that request ALSO fails, then we attempt to doubly queue the single sense request we have. Since we only support one active request at the time, defer request processing when a sense request is queued. Fixes: 600335205b8d "ide: convert to blk-mq" Reported-by: He Zhe Tested-by: He Zhe Signed-off-by: Jens Axboe --- drivers/ide/ide-atapi.c | 9 +++++- drivers/ide/ide-io.c | 61 +++++++++++++++++++++-------------------- drivers/ide/ide-park.c | 2 ++ drivers/ide/ide-probe.c | 23 +++++++++++----- include/linux/ide.h | 2 ++ 5 files changed, 59 insertions(+), 38 deletions(-) diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index da58020a144e..33a28cde126c 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - struct request *sense_rq = drive->sense_rq; + ide_hwif_t *hwif = drive->hwif; + struct request *sense_rq; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", drive->name); + spin_unlock_irqrestore(&hwif->lock, flags); return -ENOMEM; } + sense_rq = drive->sense_rq; ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; ide_insert_request_head(drive, sense_rq); + spin_unlock_irqrestore(&hwif->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8445b484ae69..b137f27a34d5 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, } if (!blk_update_request(rq, error, nr_bytes)) { - if (rq == drive->sense_rq) + if (rq == drive->sense_rq) { drive->sense_rq = NULL; + drive->sense_rq_active = false; + } __blk_mq_end_request(rq, error); return 0; @@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } -/* - * Issue a new request to a device. - */ -blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq, + bool local_requeue) { - ide_drive_t *drive = hctx->queue->queuedata; - ide_hwif_t *hwif = drive->hwif; + ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = bd->rq; ide_startstop_t startstop; if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) { @@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, if (ide_lock_host(host, hwif)) return BLK_STS_DEV_RESOURCE; - blk_mq_start_request(rq); - spin_lock_irq(&hwif->lock); if (!ide_lock_port(hwif)) { @@ -510,18 +505,6 @@ repeat: hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - /* - * we know that the queue isn't empty, but this can happen - * if ->prep_rq() decides to kill a request - */ - if (!rq) { - rq = bd->rq; - if (!rq) { - ide_unlock_port(hwif); - goto out; - } - } - /* * Sanity: don't accept a request that isn't a PM request * if we are currently power managed. This is very important as @@ -560,9 +543,12 @@ repeat: } } else { plug_device: + if (local_requeue) + list_add(&rq->queuelist, &drive->rq_list); spin_unlock_irq(&hwif->lock); ide_unlock_host(host); - ide_requeue_and_plug(drive, rq); + if (!local_requeue) + ide_requeue_and_plug(drive, rq); return BLK_STS_OK; } @@ -573,6 +559,26 @@ out: return BLK_STS_OK; } +/* + * Issue a new request to a device. + */ +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + ide_drive_t *drive = hctx->queue->queuedata; + ide_hwif_t *hwif = drive->hwif; + + spin_lock_irq(&hwif->lock); + if (drive->sense_rq_active) { + spin_unlock_irq(&hwif->lock); + return BLK_STS_DEV_RESOURCE; + } + spin_unlock_irq(&hwif->lock); + + blk_mq_start_request(bd->rq); + return ide_issue_rq(drive, bd->rq, false); +} + static int drive_is_ready(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer); void ide_insert_request_head(ide_drive_t *drive, struct request *rq) { - ide_hwif_t *hwif = drive->hwif; - unsigned long flags; - - spin_lock_irqsave(&hwif->lock, flags); + drive->sense_rq_active = true; list_add_tail(&rq->queuelist, &drive->rq_list); - spin_unlock_irqrestore(&hwif->lock, flags); - kblockd_schedule_work(&drive->rq_work); } EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 102aa3bc3e7f..8af7af6001eb 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; + spin_lock_irq(&hwif->lock); ide_insert_request_head(drive, rq); + spin_unlock_irq(&hwif->lock); out: return; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 63627be0811a..5aeaca24a28f 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work) ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); ide_hwif_t *hwif = drive->hwif; struct request *rq; + blk_status_t ret; LIST_HEAD(list); + blk_mq_quiesce_queue(drive->queue); + + ret = BLK_STS_OK; spin_lock_irq(&hwif->lock); - if (!list_empty(&drive->rq_list)) - list_splice_init(&drive->rq_list, &list); + while (!list_empty(&drive->rq_list)) { + rq = list_first_entry(&drive->rq_list, struct request, queuelist); + list_del_init(&rq->queuelist); + + spin_unlock_irq(&hwif->lock); + ret = ide_issue_rq(drive, rq, true); + spin_lock_irq(&hwif->lock); + } spin_unlock_irq(&hwif->lock); - while (!list_empty(&list)) { - rq = list_first_entry(&list, struct request, queuelist); - list_del_init(&rq->queuelist); - blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); - } + blk_mq_unquiesce_queue(drive->queue); + + if (ret != BLK_STS_OK) + kblockd_schedule_work(&drive->rq_work); } static const u8 ide_hwif_to_major[] = diff --git a/include/linux/ide.h b/include/linux/ide.h index e7d29ae633cd..971cf76a78a0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -615,6 +615,7 @@ struct ide_drive_s { /* current sense rq and buffer */ bool sense_rq_armed; + bool sense_rq_active; struct request *sense_rq; struct request_sense sense_data; @@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); From 912139cfbfa6a2bc1da052314d2c29338dae1f6a Mon Sep 17 00:00:00 2001 From: Thomas Lendacky Date: Thu, 31 Jan 2019 14:33:06 +0000 Subject: [PATCH 0402/1436] x86/microcode/amd: Don't falsely trick the late loading mechanism The load_microcode_amd() function searches for microcode patches and attempts to apply a microcode patch if it is of different level than the currently installed level. While the processor won't actually load a level that is less than what is already installed, the logic wrongly returns UCODE_NEW thus signaling to its caller reload_store() that a late loading should be attempted. If the file-system contains an older microcode revision than what is currently running, such a late microcode reload can result in these misleading messages: x86/CPU: CPU features have changed after loading microcode, but might not take effect. x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update. These messages were issued on a system where SME/SEV are not enabled by the BIOS (MSR C001_0010[23] = 0b) because during boot, early_detect_mem_encrypt() is called and cleared the SME and SEV features in this case. However, after the wrong late load attempt, get_cpu_cap() is called and reloads the SME and SEV feature bits, resulting in the messages. Update the microcode level check to not attempt microcode loading if the current level is greater than(!) and not only equal to the current patch level. [ bp: massage commit message. ] Fixes: 2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/154894518427.9406.8246222496874202773.stgit@tlendack-t1.amdoffice.net --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 51adde0a0f1a..e1f3ba19ba54 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -855,7 +855,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) if (!p) { return ret; } else { - if (boot_cpu_data.microcode == p->patch_id) + if (boot_cpu_data.microcode >= p->patch_id) return ret; ret = UCODE_NEW; From 34aaaac815d166daef361f49529f4c6b77da49f1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Jan 2019 15:48:34 +0000 Subject: [PATCH 0403/1436] ieee802154: mcr20a: fix indentation, remove tabs The are a couple of statments that are one level too deep, fix this by removing tabs. Signed-off-by: Colin Ian King Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 44de81e5f140..c589f5ae75bb 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -905,9 +905,9 @@ mcr20a_irq_clean_complete(void *context) } break; case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ): - /* rx is starting */ - dev_dbg(printdev(lp), "RX is starting\n"); - mcr20a_handle_rx(lp); + /* rx is starting */ + dev_dbg(printdev(lp), "RX is starting\n"); + mcr20a_handle_rx(lp); break; case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { From 05672636b339c557feb6a98b2f2034be790aa4fb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 27 Jan 2019 12:15:49 +0100 Subject: [PATCH 0404/1436] mt76x0: eeprom: fix chan_vs_power map in mt76x0_get_power_info Report correct eeprom per channel power value. Fix chan_vs_power map in mt76x0_get_power_info routine Fixes: f2a2e819d672 ("mt76x0: remove eeprom dependency from mt76x0_get_power_info") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- .../wireless/mediatek/mt76/mt76x0/eeprom.c | 40 +++++++++---------- .../wireless/mediatek/mt76/mt76x0/eeprom.h | 2 +- .../net/wireless/mediatek/mt76/mt76x0/phy.c | 10 ++--- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 497e762978cc..b2cabce1d74d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -212,24 +212,24 @@ void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev) mt76x02_add_rate_power_offset(t, delta); } -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp) { struct mt76x0_chan_map { u8 chan; u8 offset; } chan_map[] = { - { 2, 0 }, { 4, 1 }, { 6, 2 }, { 8, 3 }, - { 10, 4 }, { 12, 5 }, { 14, 6 }, { 38, 0 }, - { 44, 1 }, { 48, 2 }, { 54, 3 }, { 60, 4 }, - { 64, 5 }, { 102, 6 }, { 108, 7 }, { 112, 8 }, - { 118, 9 }, { 124, 10 }, { 128, 11 }, { 134, 12 }, - { 140, 13 }, { 151, 14 }, { 157, 15 }, { 161, 16 }, - { 167, 17 }, { 171, 18 }, { 173, 19 }, + { 2, 0 }, { 4, 2 }, { 6, 4 }, { 8, 6 }, + { 10, 8 }, { 12, 10 }, { 14, 12 }, { 38, 0 }, + { 44, 2 }, { 48, 4 }, { 54, 6 }, { 60, 8 }, + { 64, 10 }, { 102, 12 }, { 108, 14 }, { 112, 16 }, + { 118, 18 }, { 124, 20 }, { 128, 22 }, { 134, 24 }, + { 140, 26 }, { 151, 28 }, { 157, 30 }, { 161, 32 }, + { 167, 34 }, { 171, 36 }, { 175, 38 }, }; struct ieee80211_channel *chan = dev->mt76.chandef.chan; u8 offset, addr; + int i, idx = 0; u16 data; - int i; if (mt76x0_tssi_enabled(dev)) { s8 target_power; @@ -239,14 +239,14 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) else data = mt76x02_eeprom_get(dev, MT_EE_2G_TARGET_POWER); target_power = (data & 0xff) - dev->mt76.rate_power.ofdm[7]; - info[0] = target_power + mt76x0_get_delta(dev); - info[1] = 0; + *tp = target_power + mt76x0_get_delta(dev); return; } for (i = 0; i < ARRAY_SIZE(chan_map); i++) { - if (chan_map[i].chan <= chan->hw_value) { + if (chan->hw_value <= chan_map[i].chan) { + idx = (chan->hw_value == chan_map[i].chan); offset = chan_map[i].offset; break; } @@ -258,13 +258,16 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) addr = MT_EE_TX_POWER_DELTA_BW80 + offset; } else { switch (chan->hw_value) { + case 42: + offset = 2; + break; case 58: offset = 8; break; case 106: offset = 14; break; - case 112: + case 122: offset = 20; break; case 155: @@ -277,14 +280,9 @@ void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info) } data = mt76x02_eeprom_get(dev, addr); - - info[0] = data; - if (!info[0] || info[0] > 0x3f) - info[0] = 5; - - info[1] = data >> 8; - if (!info[1] || info[1] > 0x3f) - info[1] = 5; + *tp = data >> (8 * idx); + if (*tp < 0 || *tp > 0x3f) + *tp = 5; } static int mt76x0_check_eeprom(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h index ee9ade9f3c8b..42b259f90b6d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.h @@ -26,7 +26,7 @@ struct mt76x02_dev; int mt76x0_eeprom_init(struct mt76x02_dev *dev); void mt76x0_read_rx_gain(struct mt76x02_dev *dev); void mt76x0_get_tx_power_per_rate(struct mt76x02_dev *dev); -void mt76x0_get_power_info(struct mt76x02_dev *dev, u8 *info); +void mt76x0_get_power_info(struct mt76x02_dev *dev, s8 *tp); static inline s8 s6_to_s8(u32 val) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 1eb1a802ed20..b6166703ad76 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -845,17 +845,17 @@ static void mt76x0_phy_tssi_calibrate(struct mt76x02_dev *dev) void mt76x0_phy_set_txpower(struct mt76x02_dev *dev) { struct mt76_rate_power *t = &dev->mt76.rate_power; - u8 info[2]; + s8 info; mt76x0_get_tx_power_per_rate(dev); - mt76x0_get_power_info(dev, info); + mt76x0_get_power_info(dev, &info); - mt76x02_add_rate_power_offset(t, info[0]); + mt76x02_add_rate_power_offset(t, info); mt76x02_limit_rate_power(t, dev->mt76.txpower_conf); dev->mt76.txpower_cur = mt76x02_get_max_rate_power(t); - mt76x02_add_rate_power_offset(t, -info[0]); + mt76x02_add_rate_power_offset(t, -info); - mt76x02_phy_set_txpower(dev, info[0], info[1]); + mt76x02_phy_set_txpower(dev, info, info); } void mt76x0_phy_calibrate(struct mt76x02_dev *dev, bool power_on) From 2c2008a63e482654ab137c84d3c61c03b75e7df6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 29 Jan 2019 15:12:01 -0800 Subject: [PATCH 0405/1436] ath10k: correct bus type for WCN3990 WCN3990 is SNOC, not PCI. This prevents probing WCN3990. Fixes: 367c899f622c ("ath10k: add bus type check in ath10k_init_hw_params") Signed-off-by: Brian Norris Reviewed-by: Bjorn Andersson Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 399b501f3c3c..e8891f5fc83a 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -548,7 +548,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { { .id = WCN3990_HW_1_0_DEV_VERSION, .dev_id = 0, - .bus = ATH10K_BUS_PCI, + .bus = ATH10K_BUS_SNOC, .name = "wcn3990 hw1.0", .continuous_frag_desc = true, .tx_chain_mask = 0x7, From 91c524708de6207f59dd3512518d8a1c7b434ee3 Mon Sep 17 00:00:00 2001 From: Jacob Wen Date: Thu, 31 Jan 2019 15:18:56 +0800 Subject: [PATCH 0406/1436] l2tp: copy 4 more bytes to linear part if necessary The size of L2TPv2 header with all optional fields is 14 bytes. l2tp_udp_recv_core only moves 10 bytes to the linear part of a skb. This may lead to l2tp_recv_common read data outside of a skb. This patch make sure that there is at least 14 bytes in the linear part of a skb to meet the maximum need of l2tp_udp_recv_core and l2tp_recv_common. The minimum size of both PPP HDLC-like frame and Ethernet frame is larger than 14 bytes, so we are safe to do so. Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Suggested-by: Guillaume Nault Signed-off-by: Jacob Wen Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dd5ba0c11ab3..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); From fc42a689c4c097859e5bd37b5ea11b60dc426df6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 30 Jan 2019 10:42:30 -0800 Subject: [PATCH 0407/1436] lib/test_rhashtable: Make test_insert_dup() allocate its hash table dynamically The test_insert_dup() function from lib/test_rhashtable.c passes a pointer to a stack object to rhltable_init(). Allocate the hash table dynamically to avoid that the following is reported with object debugging enabled: ODEBUG: object (ptrval) is on stack (ptrval), but NOT annotated. WARNING: CPU: 0 PID: 1 at lib/debugobjects.c:368 __debug_object_init+0x312/0x480 Modules linked in: EIP: __debug_object_init+0x312/0x480 Call Trace: ? debug_object_init+0x1a/0x20 ? __init_work+0x16/0x30 ? rhashtable_init+0x1e1/0x460 ? sched_clock_cpu+0x57/0xe0 ? rhltable_init+0xb/0x20 ? test_insert_dup+0x32/0x20f ? trace_hardirqs_on+0x38/0xf0 ? ida_dump+0x10/0x10 ? jhash+0x130/0x130 ? my_hashfn+0x30/0x30 ? test_rht_init+0x6aa/0xab4 ? ida_dump+0x10/0x10 ? test_rhltable+0xc5c/0xc5c ? do_one_initcall+0x67/0x28e ? trace_hardirqs_off+0x22/0xe0 ? restore_all_kernel+0xf/0x70 ? trace_hardirqs_on_thunk+0xc/0x10 ? restore_all_kernel+0xf/0x70 ? kernel_init_freeable+0x142/0x213 ? rest_init+0x230/0x230 ? kernel_init+0x10/0x110 ? schedule_tail_wrapper+0x9/0xc ? ret_from_fork+0x19/0x24 Cc: Thomas Graf Cc: Herbert Xu Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Bart Van Assche Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 6a8ac7626797..e52f8cafe227 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -541,38 +541,45 @@ static unsigned int __init print_ht(struct rhltable *rhlt) static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects, int cnt, bool slow) { - struct rhltable rhlt; + struct rhltable *rhlt; unsigned int i, ret; const char *key; int err = 0; - err = rhltable_init(&rhlt, &test_rht_params_dup); - if (WARN_ON(err)) + rhlt = kmalloc(sizeof(*rhlt), GFP_KERNEL); + if (WARN_ON(!rhlt)) + return -EINVAL; + + err = rhltable_init(rhlt, &test_rht_params_dup); + if (WARN_ON(err)) { + kfree(rhlt); return err; + } for (i = 0; i < cnt; i++) { rhl_test_objects[i].value.tid = i; - key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); + key = rht_obj(&rhlt->ht, &rhl_test_objects[i].list_node.rhead); key += test_rht_params_dup.key_offset; if (slow) { - err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, + err = PTR_ERR(rhashtable_insert_slow(&rhlt->ht, key, &rhl_test_objects[i].list_node.rhead)); if (err == -EAGAIN) err = 0; } else - err = rhltable_insert(&rhlt, + err = rhltable_insert(rhlt, &rhl_test_objects[i].list_node, test_rht_params_dup); if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast")) goto skip_print; } - ret = print_ht(&rhlt); + ret = print_ht(rhlt); WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast"); skip_print: - rhltable_destroy(&rhlt); + rhltable_destroy(rhlt); + kfree(rhlt); return 0; } From 6fa19f5637a6c22bc0999596bcc83bdcac8a4fa6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Jan 2019 08:47:10 -0800 Subject: [PATCH 0408/1436] rds: fix refcount bug in rds_sock_addref syzbot was able to catch a bug in rds [1] The issue here is that the socket might be found in a hash table but that its refcount has already be set to 0 by another cpu. We need to use refcount_inc_not_zero() to be safe here. [1] refcount_t: increment on 0; use-after-free. WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked lib/refcount.c:153 [inline] WARNING: CPU: 1 PID: 23129 at lib/refcount.c:153 refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 23129 Comm: syz-executor3 Not tainted 5.0.0-rc4+ #53 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1db/0x2d0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x48 kernel/panic.c:571 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] fixup_bug arch/x86/kernel/traps.c:173 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:refcount_inc_checked lib/refcount.c:153 [inline] RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:151 Code: 1d 51 63 c8 06 31 ff 89 de e8 eb 1b f2 fd 84 db 75 dd e8 a2 1a f2 fd 48 c7 c7 60 9f 81 88 c6 05 31 63 c8 06 01 e8 af 65 bb fd <0f> 0b eb c1 90 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 54 49 RSP: 0018:ffff8880a0cbf1e8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffc90006113000 RDX: 000000000001047d RSI: ffffffff81685776 RDI: 0000000000000005 RBP: ffff8880a0cbf1f8 R08: ffff888097c9e100 R09: ffffed1015ce5021 R10: ffffed1015ce5020 R11: ffff8880ae728107 R12: ffff8880723c20c0 R13: ffff8880723c24b0 R14: dffffc0000000000 R15: ffffed1014197e64 sock_hold include/net/sock.h:647 [inline] rds_sock_addref+0x19/0x20 net/rds/af_rds.c:675 rds_find_bound+0x97c/0x1080 net/rds/bind.c:82 rds_recv_incoming+0x3be/0x1430 net/rds/recv.c:362 rds_loop_xmit+0xf3/0x2a0 net/rds/loop.c:96 rds_send_xmit+0x1355/0x2a10 net/rds/send.c:355 rds_sendmsg+0x323c/0x44e0 net/rds/send.c:1368 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:631 __sys_sendto+0x387/0x5f0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto net/socket.c:1796 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1796 do_syscall_64+0x1a3/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458089 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc266df8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 0000000000458089 RDX: 0000000000000000 RSI: 00000000204b3fff RDI: 0000000000000005 RBP: 000000000073bf00 R08: 00000000202b4000 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc266df96d4 R13: 00000000004c56e4 R14: 00000000004d94a8 R15: 00000000ffffffff Fixes: cc4dfb7f70a3 ("rds: fix two RCU related problems") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Sowmini Varadhan Cc: Santosh Shilimkar Cc: rds-devel@oss.oracle.com Cc: Cong Wang Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/bind.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __rds_create_bind_key(key, addr, port, scope_id); rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, From fedb5760648a291e949f2380d383b5b2d2749b5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 17:43:16 +0800 Subject: [PATCH 0409/1436] serial: fix race between flush_to_ldisc and tty_open There still is a race window after the commit b027e2298bd588 ("tty: fix data race between tty_init_dev and flush of buf"), and we encountered this crash issue if receive_buf call comes before tty initialization completes in tty_open and tty->driver_data may be NULL. CPU0 CPU1 ---- ---- tty_open tty_init_dev tty_ldisc_unlock schedule flush_to_ldisc receive_buf tty_port_default_receive_buf tty_ldisc_receive_buf n_tty_receive_buf_common __receive_buf uart_flush_chars uart_start /*tty->driver_data is NULL*/ tty->ops->open /*init tty->driver_data*/ it can be fixed by extending ldisc semaphore lock in tty_init_dev to driver_data initialized completely after tty->ops->open(), but this will lead to get lock on one function and unlock in some other function, and hard to maintain, so fix this race only by checking tty->driver_data when receiving, and return if tty->driver_data is NULL, and n_tty_receive_buf_common maybe calls uart_unthrottle, so add the same check. Because the tty layer knows nothing about the driver associated with the device, the tty layer can not do anything here, it is up to the tty driver itself to check for this type of race. Fix up the serial driver to correctly check to see if it is finished binding with the device when being called, and if not, abort the tty calls. [Description and problem report and testing from Li RongQing, I rewrote the patch to be in the serial layer, not in the tty core - gregkh] Reported-by: Li RongQing Tested-by: Li RongQing Signed-off-by: Wang Li Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 5c01bb6d1c24..556f50aa1b58 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -130,6 +130,9 @@ static void uart_start(struct tty_struct *tty) struct uart_port *port; unsigned long flags; + if (!state) + return; + port = uart_port_lock(state, flags); __uart_start(tty); uart_port_unlock(port, flags); @@ -727,6 +730,9 @@ static void uart_unthrottle(struct tty_struct *tty) upstat_t mask = UPSTAT_SYNC_FIFO; struct uart_port *port; + if (!state) + return; + port = uart_port_ref(state); if (!port) return; From e74c98ca2d6ae4376cc15fa2a22483430909d96b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Jan 2019 21:30:36 +0100 Subject: [PATCH 0410/1436] gfs2: Revert "Fix loop in gfs2_rbm_find" This reverts commit 2d29f6b96d8f80322ed2dd895bca590491c38d34. It turns out that the fix can lead to a ~20 percent performance regression in initial writes to the page cache according to iozone. Let's revert this for now to have more time for a proper fix. Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Linus Torvalds --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 831d7cb5a49c..17a8d3b43990 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_iter; } if (ret == -E2BIG) { - n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; From 5e66e35aab335b83d9ffb220d8a3a13986a7a60e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 31 Jan 2019 14:31:48 -0500 Subject: [PATCH 0411/1436] bnxt_en: Disable interrupts when allocating CP rings or NQs. When calling firmware to allocate a CP ring or NQ, an interrupt associated with that ring may be generated immediately before the doorbell is even setup after the firmware call returns. When servicing the interrupt, the driver may crash when trying to access the doorbell. Fix it by disabling interrupt on that vector until the doorbell is set up. Fixes: 697197e5a173 ("bnxt_en: Re-structure doorbells.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a512871176b..8bc7e495b027 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4973,12 +4973,18 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; u32 map_idx = ring->map_idx; + unsigned int vector; + vector = bp->irq_tbl[map_idx].vector; + disable_irq_nosync(vector); rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); - if (rc) + if (rc) { + enable_irq(vector); goto err_out; + } bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id); bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); + enable_irq(vector); bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id; if (!i) { From a6093ad7fc4962099d2d723bcca72f8175b58c82 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 31 Jan 2019 14:59:50 -0600 Subject: [PATCH 0412/1436] PCI: imx: Fix probe failure without power domain On chips without a separate power domain for PCI (such as 6q/6qp) the imx6_pcie_attach_pd() function incorrectly returns an error. Fix by returning 0 if dev_pm_domain_attach_by_name() does not find anything. Fixes: 3f7cceeab895 ("PCI: imx: Add multi-pd support") Reported-by: Lukas F.Hartmann Signed-off-by: Leonard Crestez [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/pci-imx6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 52e47dac028f..ac5f6ae0b254 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -310,6 +310,9 @@ static int imx6_pcie_attach_pd(struct device *dev) imx6_pcie->pd_pcie = dev_pm_domain_attach_by_name(dev, "pcie"); if (IS_ERR(imx6_pcie->pd_pcie)) return PTR_ERR(imx6_pcie->pd_pcie); + /* Do nothing when power domain missing */ + if (!imx6_pcie->pd_pcie) + return 0; link = device_link_add(dev, imx6_pcie->pd_pcie, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | From a4ace4fa20072dfe60ab48ba227e50bc2d69c246 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 31 Jan 2019 14:59:56 -0600 Subject: [PATCH 0413/1436] PCI: imx: Fix checking pd_pcie_phy device link addition The check on the device_link_add() return value is wrong; this leads to erroneous code execution, so fix it. Fixes: 3f7cceeab895 ("PCI: imx: Add multi-pd support") Signed-off-by: Leonard Crestez [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/pci-imx6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index ac5f6ae0b254..80f843030e36 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -326,13 +326,13 @@ static int imx6_pcie_attach_pd(struct device *dev) if (IS_ERR(imx6_pcie->pd_pcie_phy)) return PTR_ERR(imx6_pcie->pd_pcie_phy); - device_link_add(dev, imx6_pcie->pd_pcie_phy, + link = device_link_add(dev, imx6_pcie->pd_pcie_phy, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); - if (IS_ERR(link)) { - dev_err(dev, "Failed to add device_link to pcie_phy pd: %ld\n", PTR_ERR(link)); - return PTR_ERR(link); + if (!link) { + dev_err(dev, "Failed to add device_link to pcie_phy pd.\n"); + return -EINVAL; } return 0; From 65dbb423cf28232fed1732b779249d6164c5999b Mon Sep 17 00:00:00 2001 From: Koen Vandeputte Date: Thu, 31 Jan 2019 15:00:01 -0600 Subject: [PATCH 0414/1436] ARM: cns3xxx: Fix writing to wrong PCI config registers after alignment Originally, cns3xxx used its own functions for mapping, reading and writing config registers. Commit 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") removed the internal PCI config write function in favor of the generic one: cns3xxx_pci_write_config() --> pci_generic_config_write() cns3xxx_pci_write_config() expected aligned addresses, being produced by cns3xxx_pci_map_bus() while the generic one pci_generic_config_write() actually expects the real address as both the function and hardware are capable of byte-aligned writes. This currently leads to pci_generic_config_write() writing to the wrong registers. For instance, upon ath9k module loading: - driver ath9k gets loaded - The driver wants to write value 0xA8 to register PCI_LATENCY_TIMER, located at 0x0D - cns3xxx_pci_map_bus() aligns the address to 0x0C - pci_generic_config_write() effectively writes 0xA8 into register 0x0C (CACHE_LINE_SIZE) Fix the bug by removing the alignment in the cns3xxx mapping function. Fixes: 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") Signed-off-by: Koen Vandeputte [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Krzysztof Halasa Acked-by: Tim Harvey Acked-by: Arnd Bergmann CC: stable@vger.kernel.org # v4.0+ CC: Bjorn Helgaas CC: Olof Johansson CC: Robin Leblon CC: Rob Herring CC: Russell King --- arch/arm/mach-cns3xxx/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 318394ed5c7a..5e11ad3164e0 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -83,7 +83,7 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus, } else /* remote PCI bus */ base = cnspci->cfg1_regs + ((busno & 0xf) << 20); - return base + (where & 0xffc) + (devfn << 12); + return base + where + (devfn << 12); } static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, From 432dd7064aa1c030a488745917cfa4ebc6c8c060 Mon Sep 17 00:00:00 2001 From: Koen Vandeputte Date: Thu, 31 Jan 2019 15:00:11 -0600 Subject: [PATCH 0415/1436] ARM: cns3xxx: Use actual size reads for PCIe commit 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") reimplemented cns3xxx_pci_read_config() using pci_generic_config_read32(), which preserved the property of only doing 32-bit reads. It also replaced cns3xxx_pci_write_config() with pci_generic_config_write(), so it changed writes from always being 32 bits to being the actual size, which works just fine. Given that: - The documentation does not mention that only 32 bit access is allowed. - Writes are already executed using the actual size - Extensive testing shows that 8b, 16b and 32b reads work as intended Allow read access of any size by replacing pci_generic_config_read32() with the pci_generic_config_read() accessors. Fixes: 802b7c06adc7 ("ARM: cns3xxx: Convert PCI to use generic config accessors") Suggested-by: Bjorn Helgaas Signed-off-by: Koen Vandeputte [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Acked-by: Krzysztof Halasa Acked-by: Arnd Bergmann CC: Krzysztof Halasa CC: Olof Johansson CC: Robin Leblon CC: Rob Herring CC: Russell King CC: Tim Harvey --- arch/arm/mach-cns3xxx/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 5e11ad3164e0..95a11d5b3587 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -93,7 +93,7 @@ static int cns3xxx_pci_read_config(struct pci_bus *bus, unsigned int devfn, u32 mask = (0x1ull << (size * 8)) - 1; int shift = (where % 4) * 8; - ret = pci_generic_config_read32(bus, devfn, where, size, val); + ret = pci_generic_config_read(bus, devfn, where, size, val); if (ret == PCIBIOS_SUCCESSFUL && !bus->number && !devfn && (where & 0xffc) == PCI_CLASS_REVISION) From f14bcc0add3abecceca1a3fe538c4ec9566893f3 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 31 Jan 2019 15:00:18 -0600 Subject: [PATCH 0416/1436] Revert "PCI: armada8k: Add support for gpio controlled reset signal" Revert commit 3d71746c42 ("PCI: armada8k: Add support for gpio controlled reset signal"). That commit breaks boot on Macchiatobin board when a Mellanox NIC is present in the PCIe slot. It turns out that full reset cycle requires first comphy serdes initialization. Reset signal toggle without comphy initialization makes access to PCI configuration registers stall indefinitely. U-Boot toggles the Macchiatobin PCIe reset line already at boot, after initializing the comphy serdes. So while commit 3d71746c42 ("PCI: armada8k: Add support for gpio controlled reset signal") enables PCIe on platforms that U-Boot does not touch the reset line (like Clearfog GT-8K), it breaks PCIe (and boot) on the Macchiatobin board. Revert commit 3d71746c42 ("PCI: armada8k: Add support for gpio controlled reset signal") entirely to fix the Macchiatobin regression. Reported-by: Sven Auhagen Signed-off-by: Baruch Siach Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/dwc/pcie-armada8k.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c index b171b6bc15c8..0c389a30ef5d 100644 --- a/drivers/pci/controller/dwc/pcie-armada8k.c +++ b/drivers/pci/controller/dwc/pcie-armada8k.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "pcie-designware.h" @@ -30,7 +29,6 @@ struct armada8k_pcie { struct dw_pcie *pci; struct clk *clk; struct clk *clk_reg; - struct gpio_desc *reset_gpio; }; #define PCIE_VENDOR_REGS_OFFSET 0x8000 @@ -139,12 +137,6 @@ static int armada8k_pcie_host_init(struct pcie_port *pp) struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct armada8k_pcie *pcie = to_armada8k_pcie(pci); - if (pcie->reset_gpio) { - /* assert and then deassert the reset signal */ - gpiod_set_value_cansleep(pcie->reset_gpio, 1); - msleep(100); - gpiod_set_value_cansleep(pcie->reset_gpio, 0); - } dw_pcie_setup_rc(pp); armada8k_pcie_establish_link(pcie); @@ -257,14 +249,6 @@ static int armada8k_pcie_probe(struct platform_device *pdev) goto fail_clkreg; } - /* Get reset gpio signal and hold asserted (logically high) */ - pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", - GPIOD_OUT_HIGH); - if (IS_ERR(pcie->reset_gpio)) { - ret = PTR_ERR(pcie->reset_gpio); - goto fail_clkreg; - } - platform_set_drvdata(pdev, pcie); ret = armada8k_add_pcie_port(pcie, pdev); From 1bb54c4071f585ebef56ce8fdfe6026fa2cbcddd Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Thu, 31 Jan 2019 10:19:33 +0100 Subject: [PATCH 0417/1436] bpf, selftests: fix handling of sparse CPU allocations Previously, bpf_num_possible_cpus() had a bug when calculating a number of possible CPUs in the case of sparse CPU allocations, as it was considering only the first range or element of /sys/devices/system/cpu/possible. E.g. in the case of "0,2-3" (CPU 1 is not available), the function returned 1 instead of 3. This patch fixes the function by making it parse all CPU ranges and elements. Signed-off-by: Martynas Pumputis Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/bpf_util.h | 32 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..84fd6f1bf33e 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; - } - possible_cpus = start == 0 ? end + 1 : 0; - break; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; + } + } + fclose(fp); return possible_cpus; From 6cab5e90ab2bd323c9f3811b6c70a4687df51e27 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 28 Jan 2019 18:43:34 -0800 Subject: [PATCH 0418/1436] bpf: run bpf programs with preemption disabled Disabled preemption is necessary for proper access to per-cpu maps from BPF programs. But the sender side of socket filters didn't have preemption disabled: unix_dgram_sendmsg->sk_filter->sk_filter_trim_cap->bpf_prog_run_save_cb->BPF_PROG_RUN and a combination of af_packet with tun device didn't disable either: tpacket_snd->packet_direct_xmit->packet_pick_tx_queue->ndo_select_queue-> tun_select_queue->tun_ebpf_select_queue->bpf_prog_run_clear_cb->BPF_PROG_RUN Disable preemption before executing BPF programs (both classic and extended). Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 21 ++++++++++++++++++--- kernel/bpf/cgroup.c | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..e532fcc6e4b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -591,8 +591,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } -static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@ -611,15 +611,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, return res; } +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; +} + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res; if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; From a89fac57b5d080771efd4d71feaae19877cf68f0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jan 2019 18:12:43 -0800 Subject: [PATCH 0419/1436] bpf: fix lockdep false positive in percpu_freelist Lockdep warns about false positive: [ 12.492084] 00000000e6b28347 (&head->lock){+...}, at: pcpu_freelist_push+0x2a/0x40 [ 12.492696] but this lock was taken by another, HARDIRQ-safe lock in the past: [ 12.493275] (&rq->lock){-.-.} [ 12.493276] [ 12.493276] [ 12.493276] and interrupts could create inverse lock ordering between them. [ 12.493276] [ 12.494435] [ 12.494435] other info that might help us debug this: [ 12.494979] Possible interrupt unsafe locking scenario: [ 12.494979] [ 12.495518] CPU0 CPU1 [ 12.495879] ---- ---- [ 12.496243] lock(&head->lock); [ 12.496502] local_irq_disable(); [ 12.496969] lock(&rq->lock); [ 12.497431] lock(&head->lock); [ 12.497890] [ 12.498104] lock(&rq->lock); [ 12.498368] [ 12.498368] *** DEADLOCK *** [ 12.498368] [ 12.498837] 1 lock held by dd/276: [ 12.499110] #0: 00000000c58cb2ee (rcu_read_lock){....}, at: trace_call_bpf+0x5e/0x240 [ 12.499747] [ 12.499747] the shortest dependencies between 2nd lock and 1st lock: [ 12.500389] -> (&rq->lock){-.-.} { [ 12.500669] IN-HARDIRQ-W at: [ 12.500934] _raw_spin_lock+0x2f/0x40 [ 12.501373] scheduler_tick+0x4c/0xf0 [ 12.501812] update_process_times+0x40/0x50 [ 12.502294] tick_periodic+0x27/0xb0 [ 12.502723] tick_handle_periodic+0x1f/0x60 [ 12.503203] timer_interrupt+0x11/0x20 [ 12.503651] __handle_irq_event_percpu+0x43/0x2c0 [ 12.504167] handle_irq_event_percpu+0x20/0x50 [ 12.504674] handle_irq_event+0x37/0x60 [ 12.505139] handle_level_irq+0xa7/0x120 [ 12.505601] handle_irq+0xa1/0x150 [ 12.506018] do_IRQ+0x77/0x140 [ 12.506411] ret_from_intr+0x0/0x1d [ 12.506834] _raw_spin_unlock_irqrestore+0x53/0x60 [ 12.507362] __setup_irq+0x481/0x730 [ 12.507789] setup_irq+0x49/0x80 [ 12.508195] hpet_time_init+0x21/0x32 [ 12.508644] x86_late_time_init+0xb/0x16 [ 12.509106] start_kernel+0x390/0x42a [ 12.509554] secondary_startup_64+0xa4/0xb0 [ 12.510034] IN-SOFTIRQ-W at: [ 12.510305] _raw_spin_lock+0x2f/0x40 [ 12.510772] try_to_wake_up+0x1c7/0x4e0 [ 12.511220] swake_up_locked+0x20/0x40 [ 12.511657] swake_up_one+0x1a/0x30 [ 12.512070] rcu_process_callbacks+0xc5/0x650 [ 12.512553] __do_softirq+0xe6/0x47b [ 12.512978] irq_exit+0xc3/0xd0 [ 12.513372] smp_apic_timer_interrupt+0xa9/0x250 [ 12.513876] apic_timer_interrupt+0xf/0x20 [ 12.514343] default_idle+0x1c/0x170 [ 12.514765] do_idle+0x199/0x240 [ 12.515159] cpu_startup_entry+0x19/0x20 [ 12.515614] start_kernel+0x422/0x42a [ 12.516045] secondary_startup_64+0xa4/0xb0 [ 12.516521] INITIAL USE at: [ 12.516774] _raw_spin_lock_irqsave+0x38/0x50 [ 12.517258] rq_attach_root+0x16/0xd0 [ 12.517685] sched_init+0x2f2/0x3eb [ 12.518096] start_kernel+0x1fb/0x42a [ 12.518525] secondary_startup_64+0xa4/0xb0 [ 12.518986] } [ 12.519132] ... key at: [] __key.71384+0x0/0x8 [ 12.519649] ... acquired at: [ 12.519892] pcpu_freelist_pop+0x7b/0xd0 [ 12.520221] bpf_get_stackid+0x1d2/0x4d0 [ 12.520563] ___bpf_prog_run+0x8b4/0x11a0 [ 12.520887] [ 12.521008] -> (&head->lock){+...} { [ 12.521292] HARDIRQ-ON-W at: [ 12.521539] _raw_spin_lock+0x2f/0x40 [ 12.521950] pcpu_freelist_push+0x2a/0x40 [ 12.522396] bpf_get_stackid+0x494/0x4d0 [ 12.522828] ___bpf_prog_run+0x8b4/0x11a0 [ 12.523296] INITIAL USE at: [ 12.523537] _raw_spin_lock+0x2f/0x40 [ 12.523944] pcpu_freelist_populate+0xc0/0x120 [ 12.524417] htab_map_alloc+0x405/0x500 [ 12.524835] __do_sys_bpf+0x1a3/0x1a90 [ 12.525253] do_syscall_64+0x4a/0x180 [ 12.525659] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 12.526167] } [ 12.526311] ... key at: [] __key.13130+0x0/0x8 [ 12.526812] ... acquired at: [ 12.527047] __lock_acquire+0x521/0x1350 [ 12.527371] lock_acquire+0x98/0x190 [ 12.527680] _raw_spin_lock+0x2f/0x40 [ 12.527994] pcpu_freelist_push+0x2a/0x40 [ 12.528325] bpf_get_stackid+0x494/0x4d0 [ 12.528645] ___bpf_prog_run+0x8b4/0x11a0 [ 12.528970] [ 12.529092] [ 12.529092] stack backtrace: [ 12.529444] CPU: 0 PID: 276 Comm: dd Not tainted 5.0.0-rc3-00018-g2fa53f892422 #475 [ 12.530043] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 12.530750] Call Trace: [ 12.530948] dump_stack+0x5f/0x8b [ 12.531248] check_usage_backwards+0x10c/0x120 [ 12.531598] ? ___bpf_prog_run+0x8b4/0x11a0 [ 12.531935] ? mark_lock+0x382/0x560 [ 12.532229] mark_lock+0x382/0x560 [ 12.532496] ? print_shortest_lock_dependencies+0x180/0x180 [ 12.532928] __lock_acquire+0x521/0x1350 [ 12.533271] ? find_get_entry+0x17f/0x2e0 [ 12.533586] ? find_get_entry+0x19c/0x2e0 [ 12.533902] ? lock_acquire+0x98/0x190 [ 12.534196] lock_acquire+0x98/0x190 [ 12.534482] ? pcpu_freelist_push+0x2a/0x40 [ 12.534810] _raw_spin_lock+0x2f/0x40 [ 12.535099] ? pcpu_freelist_push+0x2a/0x40 [ 12.535432] pcpu_freelist_push+0x2a/0x40 [ 12.535750] bpf_get_stackid+0x494/0x4d0 [ 12.536062] ___bpf_prog_run+0x8b4/0x11a0 It has been explained that is a false positive here: https://lkml.org/lkml/2018/7/25/756 Recap: - stackmap uses pcpu_freelist - The lock in pcpu_freelist is a percpu lock - stackmap is only used by tracing bpf_prog - A tracing bpf_prog cannot be run if another bpf_prog has already been running (ensured by the percpu bpf_prog_active counter). Eric pointed out that this lockdep splats stops other legit lockdep splats in selftests/bpf/test_progs.c. Fix this by calling local_irq_save/restore for stackmap. Another false positive had also been worked around by calling local_irq_save in commit 89ad2fa3f043 ("bpf: fix lockdep splat"). That commit added unnecessary irq_save/restore to fast path of bpf hash map. irqs are already disabled at that point, since htab is holding per bucket spin_lock with irqsave. Let's reduce overhead for htab by introducing __pcpu_freelist_push/pop function w/o irqsave and convert pcpu_freelist_push/pop to irqsave to be used elsewhere (right now only in stackmap). It stops lockdep false positive in stackmap with a bit of acceptable overhead. Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Naresh Kamboju Reported-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/percpu_freelist.c | 41 +++++++++++++++++++++++++----------- kernel/bpf/percpu_freelist.h | 4 ++++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..f9274114c88d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); - __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); } void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ again: local_irq_restore(flags); } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu; - local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; }; +/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *); From e16ec34039c701594d55d08a5aa49ee3e1abc821 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jan 2019 18:12:44 -0800 Subject: [PATCH 0420/1436] bpf: fix potential deadlock in bpf_prog_register Lockdep found a potential deadlock between cpu_hotplug_lock, bpf_event_mutex, and cpuctx_mutex: [ 13.007000] WARNING: possible circular locking dependency detected [ 13.007587] 5.0.0-rc3-00018-g2fa53f892422-dirty #477 Not tainted [ 13.008124] ------------------------------------------------------ [ 13.008624] test_progs/246 is trying to acquire lock: [ 13.009030] 0000000094160d1d (tracepoints_mutex){+.+.}, at: tracepoint_probe_register_prio+0x2d/0x300 [ 13.009770] [ 13.009770] but task is already holding lock: [ 13.010239] 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.010877] [ 13.010877] which lock already depends on the new lock. [ 13.010877] [ 13.011532] [ 13.011532] the existing dependency chain (in reverse order) is: [ 13.012129] [ 13.012129] -> #4 (bpf_event_mutex){+.+.}: [ 13.012582] perf_event_query_prog_array+0x9b/0x130 [ 13.013016] _perf_ioctl+0x3aa/0x830 [ 13.013354] perf_ioctl+0x2e/0x50 [ 13.013668] do_vfs_ioctl+0x8f/0x6a0 [ 13.014003] ksys_ioctl+0x70/0x80 [ 13.014320] __x64_sys_ioctl+0x16/0x20 [ 13.014668] do_syscall_64+0x4a/0x180 [ 13.015007] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.015469] [ 13.015469] -> #3 (&cpuctx_mutex){+.+.}: [ 13.015910] perf_event_init_cpu+0x5a/0x90 [ 13.016291] perf_event_init+0x1b2/0x1de [ 13.016654] start_kernel+0x2b8/0x42a [ 13.016995] secondary_startup_64+0xa4/0xb0 [ 13.017382] [ 13.017382] -> #2 (pmus_lock){+.+.}: [ 13.017794] perf_event_init_cpu+0x21/0x90 [ 13.018172] cpuhp_invoke_callback+0xb3/0x960 [ 13.018573] _cpu_up+0xa7/0x140 [ 13.018871] do_cpu_up+0xa4/0xc0 [ 13.019178] smp_init+0xcd/0xd2 [ 13.019483] kernel_init_freeable+0x123/0x24f [ 13.019878] kernel_init+0xa/0x110 [ 13.020201] ret_from_fork+0x24/0x30 [ 13.020541] [ 13.020541] -> #1 (cpu_hotplug_lock.rw_sem){++++}: [ 13.021051] static_key_slow_inc+0xe/0x20 [ 13.021424] tracepoint_probe_register_prio+0x28c/0x300 [ 13.021891] perf_trace_event_init+0x11f/0x250 [ 13.022297] perf_trace_init+0x6b/0xa0 [ 13.022644] perf_tp_event_init+0x25/0x40 [ 13.023011] perf_try_init_event+0x6b/0x90 [ 13.023386] perf_event_alloc+0x9a8/0xc40 [ 13.023754] __do_sys_perf_event_open+0x1dd/0xd30 [ 13.024173] do_syscall_64+0x4a/0x180 [ 13.024519] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.024968] [ 13.024968] -> #0 (tracepoints_mutex){+.+.}: [ 13.025434] __mutex_lock+0x86/0x970 [ 13.025764] tracepoint_probe_register_prio+0x2d/0x300 [ 13.026215] bpf_probe_register+0x40/0x60 [ 13.026584] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.027042] __do_sys_bpf+0x94f/0x1a90 [ 13.027389] do_syscall_64+0x4a/0x180 [ 13.027727] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.028171] [ 13.028171] other info that might help us debug this: [ 13.028171] [ 13.028807] Chain exists of: [ 13.028807] tracepoints_mutex --> &cpuctx_mutex --> bpf_event_mutex [ 13.028807] [ 13.029666] Possible unsafe locking scenario: [ 13.029666] [ 13.030140] CPU0 CPU1 [ 13.030510] ---- ---- [ 13.030875] lock(bpf_event_mutex); [ 13.031166] lock(&cpuctx_mutex); [ 13.031645] lock(bpf_event_mutex); [ 13.032135] lock(tracepoints_mutex); [ 13.032441] [ 13.032441] *** DEADLOCK *** [ 13.032441] [ 13.032911] 1 lock held by test_progs/246: [ 13.033239] #0: 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.033909] [ 13.033909] stack backtrace: [ 13.034258] CPU: 1 PID: 246 Comm: test_progs Not tainted 5.0.0-rc3-00018-g2fa53f892422-dirty #477 [ 13.034964] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 13.035657] Call Trace: [ 13.035859] dump_stack+0x5f/0x8b [ 13.036130] print_circular_bug.isra.37+0x1ce/0x1db [ 13.036526] __lock_acquire+0x1158/0x1350 [ 13.036852] ? lock_acquire+0x98/0x190 [ 13.037154] lock_acquire+0x98/0x190 [ 13.037447] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.037876] __mutex_lock+0x86/0x970 [ 13.038167] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.038600] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.039028] ? __mutex_lock+0x86/0x970 [ 13.039337] ? __mutex_lock+0x24a/0x970 [ 13.039649] ? bpf_probe_register+0x1d/0x60 [ 13.039992] ? __bpf_trace_sched_wake_idle_without_ipi+0x10/0x10 [ 13.040478] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.040906] tracepoint_probe_register_prio+0x2d/0x300 [ 13.041325] bpf_probe_register+0x40/0x60 [ 13.041649] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.042068] ? __might_fault+0x3e/0x90 [ 13.042374] __do_sys_bpf+0x94f/0x1a90 [ 13.042678] do_syscall_64+0x4a/0x180 [ 13.042975] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.043382] RIP: 0033:0x7f23b10a07f9 [ 13.045155] RSP: 002b:00007ffdef42fdd8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141 [ 13.045759] RAX: ffffffffffffffda RBX: 00007ffdef42ff70 RCX: 00007f23b10a07f9 [ 13.046326] RDX: 0000000000000070 RSI: 00007ffdef42fe10 RDI: 0000000000000011 [ 13.046893] RBP: 00007ffdef42fdf0 R08: 0000000000000038 R09: 00007ffdef42fe10 [ 13.047462] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 13.048029] R13: 0000000000000016 R14: 00007f23b1db4690 R15: 0000000000000000 Since tracepoints_mutex will be taken in tracepoint_probe_register/unregister() there is no need to take bpf_event_mutex too. bpf_event_mutex is protecting modifications to prog array used in kprobe/perf bpf progs. bpf_raw_tracepoints don't need to take this mutex. Fixes: c4f6699dfcb8 ("bpf: introduce BPF_RAW_TRACEPOINT") Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..f1a86a0d881d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = __bpf_probe_register(btp, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return __bpf_probe_register(btp, prog); } int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, From 7c4cd051add3d00bbff008a133c936c515eaa8fe Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 30 Jan 2019 18:12:45 -0800 Subject: [PATCH 0421/1436] bpf: Fix syscall's stackmap lookup potential deadlock The map_lookup_elem used to not acquiring spinlock in order to optimize the reader. It was true until commit 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") The syscall's map_lookup_elem(stackmap) calls bpf_stackmap_copy(). bpf_stackmap_copy() may find the elem no longer needed after the copy is done. If that is the case, pcpu_freelist_push() saves this elem for reuse later. This push requires a spinlock. If a tracing bpf_prog got run in the middle of the syscall's map_lookup_elem(stackmap) and this tracing bpf_prog is calling bpf_get_stackid(stackmap) which also requires the same pcpu_freelist's spinlock, it may end up with a dead lock situation as reported by Eric Dumazet in https://patchwork.ozlabs.org/patch/1030266/ The situation is the same as the syscall's map_update_elem() which needs to acquire the pcpu_freelist's spinlock and could race with tracing bpf_prog. Hence, this patch fixes it by protecting bpf_stackmap_copy() with this_cpu_inc(bpf_prog_active) to prevent tracing bpf_prog from running. A later syscall's map_lookup_elem commit f1a2e44a3aec ("bpf: add queue and stack maps") also acquires a spinlock and races with tracing bpf_prog similarly. Hence, this patch is forward looking and protects the majority of the map lookups. bpf_map_offload_lookup_elem() is the exception since it is for network bpf_prog only (i.e. never called by tracing bpf_prog). Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..8577bb7f8be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr) } rcu_read_unlock(); } + this_cpu_dec(bpf_prog_active); + preempt_enable(); +done: if (err) goto free_value; From 5a3840a470c41ec0b85cd36ca80370330656b163 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 31 Jan 2019 12:53:47 +1100 Subject: [PATCH 0422/1436] powerpc/papr_scm: Use the correct bind address When binding an SCM volume to a physical address the hypervisor has the option to return early with a continue token with the expectation that the guest will resume the bind operation until it completes. A quirk of this interface is that the bind address will only be returned by the first bind h-call and the subsequent calls will return 0xFFFF_FFFF_FFFF_FFFF for the bind address. We currently do not save the address returned by the first h-call. As a result we will use the junk address as the base of the bound region if the hypervisor decides to split the bind across multiple h-calls. This bug was found when testing with very large SCM volumes where the bind process would take more time than they hypervisor's internal h-call time limit would allow. This patch fixes the issue by saving the bind address from the first call. Cc: stable@vger.kernel.org Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions") Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/papr_scm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 7d6457ab5d34..bba281b1fe1b 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -43,6 +43,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; uint64_t rc, token; + uint64_t saved = 0; /* * When the hypervisor cannot map all the requested memory in a single @@ -56,6 +57,8 @@ static int drc_pmem_bind(struct papr_scm_priv *p) rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, p->blocks, BIND_ANY_ADDR, token); token = ret[0]; + if (!saved) + saved = ret[1]; cond_resched(); } while (rc == H_BUSY); @@ -64,7 +67,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) return -ENXIO; } - p->bound_addr = ret[1]; + p->bound_addr = saved; dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); From f2ce6ed3dcc837af8ddb4076c71f5d370e65f6af Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Fri, 25 Jan 2019 13:55:58 +0000 Subject: [PATCH 0423/1436] arm64: dts: imx8mq: Fix boot from eMMC The boot from eMMC is currently broken on the NXP i.MX8MQ EVK board. When trying to boot from eMMC it fails with: ... [ 1.271938] mmc1: Tuning failed, falling back to fixed sampling clock [ 1.287429] print_req_error: I/O error, dev mmcblk1, sector 1 flags 0 [ 1.306833] mmc1: Tuning failed, falling back to fixed sampling clock [ 1.322325] print_req_error: I/O error, dev mmcblk1, sector 2 flags 0 [ 1.329559] Buffer I/O error on dev mmcblk1, logical block 0, async page read [ 1.336714] mmcblk1: unable to read partition table ... The problem is the result of a partial misconfiguration of the pins and the missing assigned clock rate. Fixes: 9079aca4aacd ("arm64: add support for i.MX8M EVK board") Signed-off-by: Carlo Caione Tested-by: Chris Spencer Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq-evk.dts | 44 ++++++++++---------- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 + 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts index 64acccc4bfcb..f74b13aa5aa5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts @@ -227,34 +227,34 @@ pinctrl_usdhc1_100mhz: usdhc1-100grp { fsl,pins = < - MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x85 - MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xc5 - MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xc5 - MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xc5 - MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xc5 - MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xc5 - MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xc5 - MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xc5 - MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xc5 - MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xc5 - MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x85 + MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x8d + MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xcd + MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xcd + MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xcd + MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xcd + MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xcd + MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xcd + MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xcd + MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xcd + MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xcd + MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x8d MX8MQ_IOMUXC_SD1_RESET_B_USDHC1_RESET_B 0xc1 >; }; pinctrl_usdhc1_200mhz: usdhc1-200grp { fsl,pins = < - MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x87 - MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xc7 - MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xc7 - MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xc7 - MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xc7 - MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xc7 - MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xc7 - MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xc7 - MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xc7 - MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xc7 - MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x87 + MX8MQ_IOMUXC_SD1_CLK_USDHC1_CLK 0x9f + MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0xdf + MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0xdf + MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0xdf + MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0xdf + MX8MQ_IOMUXC_SD1_DATA3_USDHC1_DATA3 0xdf + MX8MQ_IOMUXC_SD1_DATA4_USDHC1_DATA4 0xdf + MX8MQ_IOMUXC_SD1_DATA5_USDHC1_DATA5 0xdf + MX8MQ_IOMUXC_SD1_DATA6_USDHC1_DATA6 0xdf + MX8MQ_IOMUXC_SD1_DATA7_USDHC1_DATA7 0xdf + MX8MQ_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x9f MX8MQ_IOMUXC_SD1_RESET_B_USDHC1_RESET_B 0xc1 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 8e9d6d5ed7b2..b6d31499fb43 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -360,6 +360,8 @@ <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; + assigned-clocks = <&clk IMX8MQ_CLK_USDHC1>; + assigned-clock-rates = <400000000>; fsl,tuning-start-tap = <20>; fsl,tuning-step = <2>; bus-width = <4>; From 8fdd60f2ae3682caf2a7258626abc21eb4711892 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 31 Jan 2019 23:41:11 -0500 Subject: [PATCH 0424/1436] Revert "ext4: use ext4_write_inode() when fsyncing w/o a journal" This reverts commit ad211f3e94b314a910d4af03178a0b52a7d1ee0a. As Jan Kara pointed out, this change was unsafe since it means we lose the call to sync_mapping_buffers() in the nojournal case. The original point of the commit was avoid taking the inode mutex (since it causes a lockdep warning in generic/113); but we need the mutex in order to call sync_mapping_buffers(). The real fix to this problem was discussed here: https://lore.kernel.org/lkml/20181025150540.259281-4-bvanassche@acm.org The proposed patch was to fix a syzbot complaint, but the problem can also demonstrated via "kvm-xfstests -c nojournal generic/113". Multiple solutions were discused in the e-mail thread, but none have landed in the kernel as of this writing. Anyway, commit ad211f3e94b314 is absolutely the wrong way to suppress the lockdep, so revert it. Fixes: ad211f3e94b314a910d4af03178a0b52a7d1ee0a ("ext4: use ext4_write_inode() when fsyncing w/o a journal") Signed-off-by: Theodore Ts'o Reported: Jan Kara --- fs/ext4/fsync.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 712f00995390..5508baa11bb6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - if (!journal) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL - }; - - ret = ext4_write_inode(inode, &wbc); + ret = __generic_file_fsync(file, start, end, datasync); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. From 1f66b7ea8c4c99dba1e4e4a85ac532eee00af908 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 17 Jan 2019 15:47:54 -0500 Subject: [PATCH 0425/1436] drm/amd/display: Fix fclk idle state [Why] The earlier change 'Fix 6x4K displays' led to fclk value idling at higher DPM level. [How] Apply the fix only to respective multi-display configuration. Signed-off-by: Roman Li Reviewed-by: Feifei Xu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index afd287f08bc9..19801bdba0d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -591,7 +591,15 @@ static void dce11_pplib_apply_display_requirements( dc, context->bw.dce.sclk_khz); - pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz; + /* + * As workaround for >4x4K lightup set dcfclock to min_engine_clock value. + * This is not required for less than 5 displays, + * thus don't request decfclk in dc to avoid impact + * on power saving. + * + */ + pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)? + pp_display_cfg->min_engine_clock_khz : 0; pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw.dce.sclk_deep_sleep_khz; From 12292519d919ecde92e7e7c8acbcdb9f0c7c6013 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 30 Jan 2019 12:53:29 -0600 Subject: [PATCH 0426/1436] drm/amdgpu: Implement doorbell self-ring for NBIO 7.4 Fixes doorbell reflection on Vega20. Change-Id: I0495139d160a9032dff5977289b1eec11c16f781 Signed-off-by: Jay Cornwall Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4cd31a276dcd..186db182f924 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -93,7 +93,20 @@ static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, bool enable) { + u32 tmp = 0; + if (enable) { + tmp = REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_CNTL, tmp); } static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev, From 7e4545d372b560df10fa47281ef0783a479ce435 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 30 Jan 2019 19:50:04 +0800 Subject: [PATCH 0427/1436] drm/amdgpu: fix the incorrect external id for raven series This patch fixes the incorrect external id that kernel reports to user mode driver. Raven2's rev_id is starts from 0x8, so its external id (0x81) should start from rev_id + 0x79 (0x81 - 0x8). And Raven's rev_id should be 0x21 while rev_id == 1. Reported-by: Crystal Jin Signed-off-by: Huang Rui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8849b74078d6..9b639974c70c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -729,11 +729,13 @@ static int soc15_common_early_init(void *handle) case CHIP_RAVEN: adev->asic_funcs = &soc15_asic_funcs; if (adev->rev_id >= 0x8) - adev->external_rev_id = adev->rev_id + 0x81; + adev->external_rev_id = adev->rev_id + 0x79; else if (adev->pdev->device == 0x15d8) adev->external_rev_id = adev->rev_id + 0x41; + else if (adev->rev_id == 1) + adev->external_rev_id = adev->rev_id + 0x20; else - adev->external_rev_id = 0x1; + adev->external_rev_id = adev->rev_id + 0x01; if (adev->rev_id >= 0x8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | From bda2ab56356b9acdfab150f31c4bac9846253092 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 30 Jan 2019 09:47:00 +0100 Subject: [PATCH 0428/1436] mtd: Remove a debug trace in mtdpart.c Commit 2b6f0090a333 ("mtd: Check add_mtd_device() ret code") contained a leftover of the debug session that led to this bug fix. Remove this pr_info(). Fixes: 2b6f0090a333 ("mtd: Check add_mtd_device() ret code") Signed-off-by: Boris Brezillon --- drivers/mtd/mtdpart.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 60104e1079c5..e6d9467f6be0 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -632,7 +632,6 @@ err_remove_part: mutex_unlock(&mtd_partitions_mutex); free_partition(new); - pr_info("%s:%i\n", __func__, __LINE__); return ret; } From 9d0f50b80222dc273e67e4e14410fcfa4130a90c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jan 2019 11:10:57 +0100 Subject: [PATCH 0429/1436] mac80211: ensure that mgmt tx skbs have tailroom for encryption Some drivers use IEEE80211_KEY_FLAG_SW_MGMT_TX to indicate that management frames need to be software encrypted. Since normal data packets are still encrypted by the hardware, crypto_tx_tailroom_needed_cnt gets decremented after key upload to hw. This can lead to passing skbs to ccmp_encrypt_skb, which don't have the necessary tailroom for software encryption. Change the code to add tailroom for encrypted management packets, even if crypto_tx_tailroom_needed_cnt is 0. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f170d6c6629a..928f13a208b0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1938,9 +1938,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1948,8 +1955,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); From e005bd7ddea06784c1eb91ac5bb6b171a94f3b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Feb 2019 11:09:54 +0100 Subject: [PATCH 0430/1436] cfg80211: call disconnect_wk when AP stops Since we now prevent regulatory restore during STA disconnect if concurrent AP interfaces are active, we need to reschedule this check when the AP state changes. This fixes never doing a restore when an AP is the last interface to stop. Or to put it another way: we need to re-check after anything we check here changes. Cc: stable@vger.kernel.org Fixes: 113f3aaa81bd ("cfg80211: Prevent regulatory restore during STA disconnect in concurrent interfaces") Signed-off-by: Johannes Berg --- net/wireless/ap.c | 2 ++ net/wireless/core.h | 2 ++ net/wireless/sme.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* From d561aa0a70bb2e1dd85fde98b6a5561e4175ac3e Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 1 Feb 2019 16:51:10 +0800 Subject: [PATCH 0431/1436] ALSA: hda/realtek - Fix lose hp_pins for disable auto mute When auto_mute = no or spec->suppress_auto_mute = 1, cfg->hp_pins will lose value. Add this patch to find hp_pins value. I add fixed for ALC282 ALC225 ALC256 ALC294 and alc_default_init() alc_default_shutup(). Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f2523fab5fed..5b1a71fb909c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2963,6 +2963,11 @@ static void alc282_init(struct hda_codec *codec) bool hp_pin_sense; int coef78; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + alc282_restore_default_value(codec); if (!hp_pin) @@ -3000,6 +3005,11 @@ static void alc282_shutup(struct hda_codec *codec) bool hp_pin_sense; int coef78; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) { alc269_shutup(codec); return; @@ -3159,6 +3169,11 @@ static void alc256_init(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) return; @@ -3195,6 +3210,11 @@ static void alc256_shutup(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) { alc269_shutup(codec); return; @@ -3231,6 +3251,11 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp1_pin_sense, hp2_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) return; @@ -3274,6 +3299,11 @@ static void alc225_shutup(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp1_pin_sense, hp2_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) { alc269_shutup(codec); return; @@ -3318,6 +3348,11 @@ static void alc_default_init(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) return; @@ -3347,6 +3382,11 @@ static void alc_default_shutup(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; bool hp_pin_sense; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) { alc269_shutup(codec); return; @@ -3379,6 +3419,11 @@ static void alc294_hp_init(struct hda_codec *codec) hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; int i, val; + if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + hp_pin = spec->gen.autocfg.line_out_pins[0]; + } + if (!hp_pin) return; From 35a39f98567d8d3f1cea48f0f30de1a7e736b644 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Feb 2019 11:19:50 +0100 Subject: [PATCH 0432/1436] ALSA: hda/realtek - Use a common helper for hp pin reference Replace the open-codes in many places with a new common helper for performing the same thing: referring to the primary headphone pin. This eventually fixes the potentially missing headphone pin on some weird devices, too. Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 96 +++++++++-------------------------- 1 file changed, 24 insertions(+), 72 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5b1a71fb909c..6df758adff84 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -515,6 +515,15 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type) } } +/* get a primary headphone pin if available */ +static hda_nid_t alc_get_hp_pin(struct alc_spec *spec) +{ + if (spec->gen.autocfg.hp_pins[0]) + return spec->gen.autocfg.hp_pins[0]; + if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) + return spec->gen.autocfg.line_out_pins[0]; + return 0; +} /* * Realtek SSID verification @@ -725,9 +734,7 @@ do_sku: * 15 : 1 --> enable the function "Mute internal speaker * when the external headphone out jack is plugged" */ - if (!spec->gen.autocfg.hp_pins[0] && - !(spec->gen.autocfg.line_out_pins[0] && - spec->gen.autocfg.line_out_type == AUTO_PIN_HP_OUT)) { + if (!alc_get_hp_pin(spec)) { hda_nid_t nid; tmp = (ass >> 11) & 0x3; /* HP to chassis */ nid = ports[tmp]; @@ -2959,15 +2966,10 @@ static void alc282_restore_default_value(struct hda_codec *codec) static void alc282_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; int coef78; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - alc282_restore_default_value(codec); if (!hp_pin) @@ -3001,15 +3003,10 @@ static void alc282_init(struct hda_codec *codec) static void alc282_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; int coef78; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3084,14 +3081,9 @@ static void alc283_restore_default_value(struct hda_codec *codec) static void alc283_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - alc283_restore_default_value(codec); if (!hp_pin) @@ -3125,14 +3117,9 @@ static void alc283_init(struct hda_codec *codec) static void alc283_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3166,14 +3153,9 @@ static void alc283_shutup(struct hda_codec *codec) static void alc256_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) return; @@ -3207,14 +3189,9 @@ static void alc256_init(struct hda_codec *codec) static void alc256_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3248,14 +3225,9 @@ static void alc256_shutup(struct hda_codec *codec) static void alc225_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) return; @@ -3296,14 +3268,9 @@ static void alc225_init(struct hda_codec *codec) static void alc225_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3345,14 +3312,9 @@ static void alc225_shutup(struct hda_codec *codec) static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) return; @@ -3379,14 +3341,9 @@ static void alc_default_init(struct hda_codec *codec) static void alc_default_shutup(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) { alc269_shutup(codec); return; @@ -3416,14 +3373,9 @@ static void alc_default_shutup(struct hda_codec *codec) static void alc294_hp_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); int i, val; - if (!spec->gen.autocfg.hp_outs && spec->gen.suppress_auto_mute) { - if (spec->gen.autocfg.line_out_type == AC_JACK_HP_OUT) - hp_pin = spec->gen.autocfg.line_out_pins[0]; - } - if (!hp_pin) return; @@ -4825,7 +4777,7 @@ static void alc_update_headset_mode(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t mux_pin = spec->gen.imux_pins[spec->gen.cur_mux[0]]; - hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + hda_nid_t hp_pin = alc_get_hp_pin(spec); int new_headset_mode; @@ -5104,7 +5056,7 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec, static void alc_shutup_dell_xps13(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - int hp_pin = spec->gen.autocfg.hp_pins[0]; + int hp_pin = alc_get_hp_pin(spec); /* Prevent pop noises when headphones are plugged in */ snd_hda_codec_write(codec, hp_pin, 0, @@ -5197,7 +5149,7 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PROBE) { int mic_pin = find_ext_mic_pin(codec); - int hp_pin = spec->gen.autocfg.hp_pins[0]; + int hp_pin = alc_get_hp_pin(spec); if (snd_BUG_ON(!mic_pin || !hp_pin)) return; From 305a0ade180981686eec1f92aa6252a7c6ebb1cf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Jan 2019 17:46:03 +0100 Subject: [PATCH 0433/1436] ALSA: hda - Serialize codec registrations In the current code, the codec registration may happen both at the codec bind time and the end of the controller probe time. In a rare occasion, they race with each other, leading to Oops due to the still uninitialized card device. This patch introduces a simple flag to prevent the codec registration at the codec bind time as long as the controller probe is going on. The controller probe invokes snd_card_register() that does the whole registration task, and we don't need to register each piece beforehand. Cc: Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 1 + sound/pci/hda/hda_bind.c | 3 ++- sound/pci/hda/hda_intel.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 7fa48b100936..cc7c8d42d4fd 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -68,6 +68,7 @@ struct hda_bus { unsigned int response_reset:1; /* controller was reset */ unsigned int in_reset:1; /* during reset operation */ unsigned int no_response_fallback:1; /* don't fallback at RIRB error */ + unsigned int bus_probing :1; /* during probing process */ int primary_dig_out_type; /* primary digital out PCM type */ unsigned int mixer_assigned; /* codec addr for mixer name */ diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 9174f1b3a987..1ec706ced75c 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -115,7 +115,8 @@ static int hda_codec_driver_probe(struct device *dev) err = snd_hda_codec_build_controls(codec); if (err < 0) goto error_module; - if (codec->card->registered) { + /* only register after the bus probe finished; otherwise it's racy */ + if (!codec->bus->bus_probing && codec->card->registered) { err = snd_card_register(codec->card); if (err < 0) goto error_module; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e784130ea4e0..e5c49003e75f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2185,6 +2185,7 @@ static int azx_probe_continue(struct azx *chip) int dev = chip->dev_index; int err; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; /* bind with i915 if needed */ @@ -2269,6 +2270,7 @@ out_free: if (err < 0) hda->init_failed = 1; complete_all(&hda->probe_wait); + to_hda_bus(bus)->bus_probing = 0; return err; } From c2e28ef7711ffcb083474ee5f154264c6ec1ec07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomislav=20Po=C5=BEega?= Date: Thu, 27 Dec 2018 15:05:25 +0100 Subject: [PATCH 0434/1436] rt2x00: reduce tx power to nominal level on RT6352 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current implementation of RT6352 support provides too high tx power at least on iPA/eLNA devices. Reduce amplification of variable gain amplifier by 6dB to match board target power of 17dBm. Transmited signal strength with this patch is similar to that of stock firmware or pandorabox firmware. Throughput measured with iperf improves. Device tested: Xiaomi Miwifi Mini. Signed-off-by: Tomislav Požega Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 0e95555aec62..7f813f6f8792 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -5477,7 +5477,7 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev) rt2800_register_write(rt2x00dev, TX_SW_CFG2, 0x00000000); rt2800_register_write(rt2x00dev, MIMO_PS_CFG, 0x00000002); rt2800_register_write(rt2x00dev, TX_PIN_CFG, 0x00150F0F); - rt2800_register_write(rt2x00dev, TX_ALC_VGA3, 0x06060606); + rt2800_register_write(rt2x00dev, TX_ALC_VGA3, 0x00000000); rt2800_register_write(rt2x00dev, TX0_BB_GAIN_ATTEN, 0x0); rt2800_register_write(rt2x00dev, TX1_BB_GAIN_ATTEN, 0x0); rt2800_register_write(rt2x00dev, TX0_RF_GAIN_ATTEN, 0x6C6C666C); From db040dfa53e2905111e400b28d0bd6f38c78d348 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Jan 2019 11:26:32 -0600 Subject: [PATCH 0435/1436] qtnfmac: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 659e7649fe22..cf386f579060 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -914,9 +914,8 @@ qtnf_cmd_resp_proc_hw_info(struct qtnf_bus *bus, if (WARN_ON(resp->n_reg_rules > NL80211_MAX_SUPP_REG_RULES)) return -E2BIG; - hwinfo->rd = kzalloc(sizeof(*hwinfo->rd) - + sizeof(struct ieee80211_reg_rule) - * resp->n_reg_rules, GFP_KERNEL); + hwinfo->rd = kzalloc(struct_size(hwinfo->rd, reg_rules, + resp->n_reg_rules), GFP_KERNEL); if (!hwinfo->rd) return -ENOMEM; From 434256833d8eb988cb7f3b8a41699e2fe48d9332 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 8 Jan 2019 11:42:21 -0600 Subject: [PATCH 0436/1436] libertas: add checks for the return value of sysfs_create_group sysfs_create_group() could fail. The fix checkes its return values and issue error messages if it fails. Signed-off-by: Kangjie Lu Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/libertas/mesh.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c index b0cb16ef8d1d..2315fdff56c2 100644 --- a/drivers/net/wireless/marvell/libertas/mesh.c +++ b/drivers/net/wireless/marvell/libertas/mesh.c @@ -797,7 +797,12 @@ static void lbs_persist_config_init(struct net_device *dev) { int ret; ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group); + if (ret) + pr_err("failed to create boot_opts_group.\n"); + ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group); + if (ret) + pr_err("failed to create mesh_ie_group.\n"); } static void lbs_persist_config_remove(struct net_device *dev) From 46b87976d7a8a75759a13a6db0e1a5f166011968 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Tue, 8 Jan 2019 22:49:23 +0000 Subject: [PATCH 0437/1436] rtl8723ae: Take the FW LPS mode handling out This appears to trigger a firmware bug and causes severe problems with rtl8723ae PCI devices. When the power save mode is activated for longer periods of time the firmware stops to receive any packets. This problem was exposed by commit 873ffe154ae0 ("rtlwifi: Fix logic error in enter/exit power-save mode"). Previously the power save mode was only active rarely and only for a short time so that the problem was not noticeable. Signed-off-by: Bernd Edlinger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index 07b82700d1de..3103151dda2b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -266,8 +266,8 @@ static struct rtl_hal_ops rtl8723e_hal_ops = { static struct rtl_mod_params rtl8723e_mod_params = { .sw_crypto = false, .inactiveps = true, - .swctrl_lps = false, - .fwctrl_lps = true, + .swctrl_lps = true, + .fwctrl_lps = false, .aspm_support = 1, .debug_level = 0, .debug_mask = 0, @@ -395,8 +395,8 @@ module_param_named(disable_watchdog, rtl8723e_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n"); MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n"); MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)"); From 5bb5385fbf3e31e72ee81e05c14f958dab2d53b6 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Tue, 8 Jan 2019 22:49:40 +0000 Subject: [PATCH 0438/1436] rtl8723ae: Dont use old data for input gain control When no beacon was received, the value in dm.undec_sm_pwdb is most likely out of date and should not be used to adjust the input path. Assume instead that the signal level is low. Fix the state machine in rtl8723e_dm_cck_packet_detection_thresh which did not clear pre_cck_fa_state when changing cur_cck_pd_state from CCK_PD_STAGE_LOWRSSI/CCK_FA_STAGE_LOW to CCK_PD_STAGE_HIGHRSSI and back again to CCK_PD_STAGE_LOWRSSI/CCK_FA_STAGE_LOW, the register RCCK0_CCA not written to 0x83 on the second change. Explicitly initialize pre_cck_fa_state/cur_cck_fa_state in rtl_dm_diginit. Signed-off-by: Bernd Edlinger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 2 ++ drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 4bf7967590ca..ce23339bf9fb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1957,5 +1957,7 @@ void rtl_dm_diginit(struct ieee80211_hw *hw, u32 cur_igvalue) dm_digtable->bt30_cur_igi = 0x32; dm_digtable->pre_cck_pd_state = CCK_PD_STAGE_MAX; dm_digtable->cur_cck_pd_state = CCK_PD_STAGE_LOWRSSI; + dm_digtable->pre_cck_fa_state = 0; + dm_digtable->cur_cck_fa_state = 0; } EXPORT_SYMBOL(rtl_dm_diginit); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c index 42a6fba90ba9..902b94466c55 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c @@ -151,8 +151,14 @@ static u8 rtl8723e_dm_initial_gain_min_pwdb(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct dig_t *dm_digtable = &rtlpriv->dm_digtable; + struct rtl_mac *mac = rtl_mac(rtlpriv); long rssi_val_min = 0; + if (mac->link_state == MAC80211_LINKED && + mac->opmode == NL80211_IFTYPE_STATION && + rtlpriv->link_info.bcn_rx_inperiod == 0) + return 0; + if ((dm_digtable->curmultista_cstate == DIG_MULTISTA_CONNECT) && (dm_digtable->cursta_cstate == DIG_STA_CONNECT)) { if (rtlpriv->dm.entry_min_undec_sm_pwdb != 0) @@ -417,6 +423,8 @@ static void rtl8723e_dm_cck_packet_detection_thresh(struct ieee80211_hw *hw) } else { rtl_set_bbreg(hw, RCCK0_CCA, MASKBYTE2, 0xcd); rtl_set_bbreg(hw, RCCK0_SYSTEM, MASKBYTE1, 0x47); + dm_digtable->pre_cck_fa_state = 0; + dm_digtable->cur_cck_fa_state = 0; } dm_digtable->pre_cck_pd_state = dm_digtable->cur_cck_pd_state; From 28484b6b37ba4a500a2913e5b64456a5717259cf Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Tue, 8 Jan 2019 22:49:52 +0000 Subject: [PATCH 0439/1436] rtl8723ae: Re-introduce the adaptive rate control This re-introduces the function rtl8723e_dm_refresh_rate_adaptive_mask. This function was present in a previous version of the code base, it works just fine for me -- as long as it is not using stale data. Unlike the original version of this function it avoids using dm.undec_sm_pwdb when no beacon was received. Fixed a style nit in rtl8723e_dm_init_rate_adaptive_mask. Signed-off-by: Bernd Edlinger Signed-off-by: Kalle Valo --- .../wireless/realtek/rtlwifi/rtl8723ae/dm.c | 87 ++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c index 902b94466c55..acfd54c6f8dd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/dm.c @@ -673,7 +673,7 @@ void rtl8723e_dm_check_txpower_tracking(struct ieee80211_hw *hw) void rtl8723e_dm_init_rate_adaptive_mask(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rate_adaptive *p_ra = &(rtlpriv->ra); + struct rate_adaptive *p_ra = &rtlpriv->ra; p_ra->ratr_state = DM_RATR_STA_INIT; p_ra->pre_ratr_state = DM_RATR_STA_INIT; @@ -685,6 +685,89 @@ void rtl8723e_dm_init_rate_adaptive_mask(struct ieee80211_hw *hw) } +void rtl8723e_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); + struct rate_adaptive *p_ra = &rtlpriv->ra; + u32 low_rssithresh_for_ra, high_rssithresh_for_ra; + struct ieee80211_sta *sta = NULL; + + if (is_hal_stop(rtlhal)) { + RT_TRACE(rtlpriv, COMP_RATE, DBG_LOUD, + " driver is going to unload\n"); + return; + } + + if (!rtlpriv->dm.useramask) { + RT_TRACE(rtlpriv, COMP_RATE, DBG_LOUD, + " driver does not control rate adaptive mask\n"); + return; + } + + if (mac->link_state == MAC80211_LINKED && + mac->opmode == NL80211_IFTYPE_STATION) { + switch (p_ra->pre_ratr_state) { + case DM_RATR_STA_HIGH: + high_rssithresh_for_ra = 50; + low_rssithresh_for_ra = 20; + break; + case DM_RATR_STA_MIDDLE: + high_rssithresh_for_ra = 55; + low_rssithresh_for_ra = 20; + break; + case DM_RATR_STA_LOW: + high_rssithresh_for_ra = 60; + low_rssithresh_for_ra = 25; + break; + default: + high_rssithresh_for_ra = 50; + low_rssithresh_for_ra = 20; + break; + } + + if (rtlpriv->link_info.bcn_rx_inperiod == 0) + switch (p_ra->pre_ratr_state) { + case DM_RATR_STA_HIGH: + default: + p_ra->ratr_state = DM_RATR_STA_MIDDLE; + break; + case DM_RATR_STA_MIDDLE: + case DM_RATR_STA_LOW: + p_ra->ratr_state = DM_RATR_STA_LOW; + break; + } + else if (rtlpriv->dm.undec_sm_pwdb > high_rssithresh_for_ra) + p_ra->ratr_state = DM_RATR_STA_HIGH; + else if (rtlpriv->dm.undec_sm_pwdb > low_rssithresh_for_ra) + p_ra->ratr_state = DM_RATR_STA_MIDDLE; + else + p_ra->ratr_state = DM_RATR_STA_LOW; + + if (p_ra->pre_ratr_state != p_ra->ratr_state) { + RT_TRACE(rtlpriv, COMP_RATE, DBG_LOUD, + "RSSI = %ld\n", + rtlpriv->dm.undec_sm_pwdb); + RT_TRACE(rtlpriv, COMP_RATE, DBG_LOUD, + "RSSI_LEVEL = %d\n", p_ra->ratr_state); + RT_TRACE(rtlpriv, COMP_RATE, DBG_LOUD, + "PreState = %d, CurState = %d\n", + p_ra->pre_ratr_state, p_ra->ratr_state); + + rcu_read_lock(); + sta = rtl_find_sta(hw, mac->bssid); + if (sta) + rtlpriv->cfg->ops->update_rate_tbl(hw, sta, + p_ra->ratr_state, + true); + rcu_read_unlock(); + + p_ra->pre_ratr_state = p_ra->ratr_state; + } + } +} + void rtl8723e_dm_rf_saving(struct ieee80211_hw *hw, u8 bforce_in_normal) { struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -834,7 +917,7 @@ void rtl8723e_dm_watchdog(struct ieee80211_hw *hw) rtl8723e_dm_dynamic_bb_powersaving(hw); rtl8723e_dm_dynamic_txpower(hw); rtl8723e_dm_check_txpower_tracking(hw); - /* rtl92c_dm_refresh_rate_adaptive_mask(hw); */ + rtl8723e_dm_refresh_rate_adaptive_mask(hw); rtl8723e_dm_bt_coexist(hw); rtl8723e_dm_check_edca_turbo(hw); } From 051337d412d2127735b65d7752e99a7478c96db3 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Tue, 8 Jan 2019 22:50:09 +0000 Subject: [PATCH 0440/1436] rtlwifi: Don't clear num_rx_inperiod too early This patch moves the clearing of rtlpriv->link_info.num_rx_inperiod in rtl_watchdog_wq_callback a few lines down. This is necessary since it is still used in the "AP off" detection code block. Moved clearing of rtlpriv->link_info.num_rx_inperiod as well for consistency. Signed-off-by: Bernd Edlinger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index ef9b502ce576..7aa68fe5d791 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -2172,8 +2172,6 @@ label_lps_done: ; } - rtlpriv->link_info.num_rx_inperiod = 0; - rtlpriv->link_info.num_tx_inperiod = 0; for (tid = 0; tid <= 7; tid++) rtlpriv->link_info.tidtx_inperiod[tid] = 0; @@ -2236,6 +2234,8 @@ label_lps_done: rtlpriv->btcoexist.btc_info.in_4way = false; } + rtlpriv->link_info.num_rx_inperiod = 0; + rtlpriv->link_info.num_tx_inperiod = 0; rtlpriv->link_info.bcn_rx_inperiod = 0; /* <6> scan list */ From 1a0f547831dcee5364d77f603d6ce4a08737a50d Mon Sep 17 00:00:00 2001 From: Hemantkumar Suthar Date: Thu, 10 Jan 2019 08:45:54 +0000 Subject: [PATCH 0441/1436] mwifiex: add support for sd8977 chipset This patch adds support for 8977 chipset to mwifiex with SDIO interface. Register offsets and supported feature flags are updated. Firmware image used will be mrvl/sd8977_uapsta.bin. Signed-off-by: Hemantkumar Suthar Signed-off-by: Rakesh Parmar Signed-off-by: Cathy Luo Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/Kconfig | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 5 ++ drivers/net/wireless/marvell/mwifiex/sdio.h | 70 ++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/Kconfig b/drivers/net/wireless/marvell/mwifiex/Kconfig index 279167ddd293..524fd565cb2a 100644 --- a/drivers/net/wireless/marvell/mwifiex/Kconfig +++ b/drivers/net/wireless/marvell/mwifiex/Kconfig @@ -9,7 +9,7 @@ config MWIFIEX mwifiex. config MWIFIEX_SDIO - tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8997" + tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8997" depends on MWIFIEX && MMC select FW_LOADER select WANT_DEV_COREDUMP diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index d49fbd58afa7..a85648342d15 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -489,6 +489,8 @@ static void mwifiex_sdio_coredump(struct device *dev) #define SDIO_DEVICE_ID_MARVELL_8887 (0x9135) /* Device ID for SD8801 */ #define SDIO_DEVICE_ID_MARVELL_8801 (0x9139) +/* Device ID for SD8977 */ +#define SDIO_DEVICE_ID_MARVELL_8977 (0x9145) /* Device ID for SD8997 */ #define SDIO_DEVICE_ID_MARVELL_8997 (0x9141) @@ -507,6 +509,8 @@ static const struct sdio_device_id mwifiex_ids[] = { .driver_data = (unsigned long)&mwifiex_sdio_sd8887}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8801), .driver_data = (unsigned long)&mwifiex_sdio_sd8801}, + {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977), + .driver_data = (unsigned long)&mwifiex_sdio_sd8977}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997), .driver_data = (unsigned long)&mwifiex_sdio_sd8997}, {}, @@ -2726,4 +2730,5 @@ MODULE_FIRMWARE(SD8787_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8797_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8897_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8887_DEFAULT_FW_NAME); +MODULE_FIRMWARE(SD8977_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_DEFAULT_FW_NAME); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index dccf7fd1aef3..912de2cde8d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -36,6 +36,7 @@ #define SD8897_DEFAULT_FW_NAME "mrvl/sd8897_uapsta.bin" #define SD8887_DEFAULT_FW_NAME "mrvl/sd8887_uapsta.bin" #define SD8801_DEFAULT_FW_NAME "mrvl/sd8801_uapsta.bin" +#define SD8977_DEFAULT_FW_NAME "mrvl/sd8977_uapsta.bin" #define SD8997_DEFAULT_FW_NAME "mrvl/sd8997_uapsta.bin" #define BLOCK_MODE 1 @@ -371,6 +372,59 @@ static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8897 = { 0x59, 0x5c, 0x5d}, }; +static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8977 = { + .start_rd_port = 0, + .start_wr_port = 0, + .base_0_reg = 0xF8, + .base_1_reg = 0xF9, + .poll_reg = 0x5C, + .host_int_enable = UP_LD_HOST_INT_MASK | DN_LD_HOST_INT_MASK | + CMD_PORT_UPLD_INT_MASK | CMD_PORT_DNLD_INT_MASK, + .host_int_rsr_reg = 0x4, + .host_int_status_reg = 0x0C, + .host_int_mask_reg = 0x08, + .status_reg_0 = 0xE8, + .status_reg_1 = 0xE9, + .sdio_int_mask = 0xff, + .data_port_mask = 0xffffffff, + .io_port_0_reg = 0xE4, + .io_port_1_reg = 0xE5, + .io_port_2_reg = 0xE6, + .max_mp_regs = 196, + .rd_bitmap_l = 0x10, + .rd_bitmap_u = 0x11, + .rd_bitmap_1l = 0x12, + .rd_bitmap_1u = 0x13, + .wr_bitmap_l = 0x14, + .wr_bitmap_u = 0x15, + .wr_bitmap_1l = 0x16, + .wr_bitmap_1u = 0x17, + .rd_len_p0_l = 0x18, + .rd_len_p0_u = 0x19, + .card_misc_cfg_reg = 0xd8, + .card_cfg_2_1_reg = 0xd9, + .cmd_rd_len_0 = 0xc0, + .cmd_rd_len_1 = 0xc1, + .cmd_rd_len_2 = 0xc2, + .cmd_rd_len_3 = 0xc3, + .cmd_cfg_0 = 0xc4, + .cmd_cfg_1 = 0xc5, + .cmd_cfg_2 = 0xc6, + .cmd_cfg_3 = 0xc7, + .fw_dump_host_ready = 0xcc, + .fw_dump_ctrl = 0xf0, + .fw_dump_start = 0xf1, + .fw_dump_end = 0xf8, + .func1_dump_reg_start = 0x10, + .func1_dump_reg_end = 0x17, + .func1_scratch_reg = 0xe8, + .func1_spec_reg_num = 13, + .func1_spec_reg_table = {0x08, 0x58, 0x5C, 0x5D, + 0x60, 0x61, 0x62, 0x64, + 0x65, 0x66, 0x68, 0x69, + 0x6a}, +}; + static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8997 = { .start_rd_port = 0, .start_wr_port = 0, @@ -532,6 +586,22 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { .can_ext_scan = true, }; +static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { + .firmware = SD8977_DEFAULT_FW_NAME, + .reg = &mwifiex_reg_sd8977, + .max_ports = 32, + .mp_agg_pkt_limit = 16, + .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, + .mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .supports_sdio_new_mode = true, + .has_control_mask = false, + .can_dump_fw = true, + .fw_dump_enh = true, + .can_auto_tdls = false, + .can_ext_scan = true, +}; + static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .firmware = SD8997_DEFAULT_FW_NAME, .reg = &mwifiex_reg_sd8997, From 126824f5ce671b4f2d1d01aa1fa7ff4de7174869 Mon Sep 17 00:00:00 2001 From: Andrey Shevchenko Date: Mon, 14 Jan 2019 09:39:35 +0000 Subject: [PATCH 0442/1436] qtnfmac: support EBUSY errcode for QLINK protocol Add support of EBUSY error code for remote procedures over QLINK protocol. Signed-off-by: Andrey Shevchenko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 2 ++ drivers/net/wireless/quantenna/qtnfmac/qlink.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index cf386f579060..6575247c8b6b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -72,6 +72,8 @@ static int qtnf_cmd_resp_result_decode(enum qlink_cmd_result qcode) return -EADDRINUSE; case QLINK_CMD_RESULT_EADDRNOTAVAIL: return -EADDRNOTAVAIL; + case QLINK_CMD_RESULT_EBUSY: + return -EBUSY; default: return -EFAULT; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 8d62addea895..f9c7f87afaf8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -733,6 +733,7 @@ enum qlink_cmd_result { QLINK_CMD_RESULT_EALREADY, QLINK_CMD_RESULT_EADDRINUSE, QLINK_CMD_RESULT_EADDRNOTAVAIL, + QLINK_CMD_RESULT_EBUSY, }; /** From d1365e794eb31f74d7326f0c60a2aa13b2d70ac8 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:36 +0000 Subject: [PATCH 0443/1436] qtnfmac: fix INTx interrupt handling In the current implementation INTx interrupt is deasserted after the control path processing. However this may lead to missed interrupts from the wireless card. For instance, this may happen as a result of control path activity, when another interrupt arrives before INTx is deasserted. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c index 598edb814421..cbcda57105f3 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c @@ -559,6 +559,9 @@ static irqreturn_t qtnf_pcie_topaz_interrupt(int irq, void *data) if (!priv->msi_enabled && !qtnf_topaz_intx_asserted(ts)) return IRQ_NONE; + if (!priv->msi_enabled) + qtnf_deassert_intx(ts); + priv->pcie_irq_count++; qtnf_shm_ipc_irq_handler(&priv->shm_ipc_ep_in); @@ -571,9 +574,6 @@ static irqreturn_t qtnf_pcie_topaz_interrupt(int irq, void *data) tasklet_hi_schedule(&priv->reclaim_tq); - if (!priv->msi_enabled) - qtnf_deassert_intx(ts); - return IRQ_HANDLED; } From de624a355d928b591af32f52198540b8f4b7048c Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:38 +0000 Subject: [PATCH 0444/1436] qtnfmac: add support for 4addr mode Advertise WIPHY_FLAG_4ADDR_STATION capability to wireless core. Send use4addr interface change flag to firmware in change_virtual_intf cfg80211 callback. In order to enable adding wireless station interface to bridge one should turn on 4addr mode using the following command: $ iw dev wlan0 set 4addr on $ brctl addif br0 wlan0 If this commands succeeds, then interface can be added to bridge. Note that when wireless interface is added to bridge, wpa_supplicant should be started with appropriate -b parameter, e.g: $ wpa_supplicant -Dnl80211 -iwlan0 -c/path/to/wpa.conf -b br0 Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 21 ++++++++++++------- .../net/wireless/quantenna/qtnfmac/commands.c | 14 ++++++++----- .../net/wireless/quantenna/qtnfmac/commands.h | 6 ++++-- drivers/net/wireless/quantenna/qtnfmac/core.c | 4 +++- .../net/wireless/quantenna/qtnfmac/qlink.h | 3 ++- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 51b33ec78fac..a43f8120df3c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -122,7 +122,8 @@ qtnf_change_virtual_intf(struct wiphy *wiphy, struct vif_params *params) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - u8 *mac_addr; + u8 *mac_addr = NULL; + int use4addr = 0; int ret; ret = qtnf_validate_iface_combinations(wiphy, vif, type); @@ -132,14 +133,14 @@ qtnf_change_virtual_intf(struct wiphy *wiphy, return ret; } - if (params) + if (params) { mac_addr = params->macaddr; - else - mac_addr = NULL; + use4addr = params->use_4addr; + } qtnf_scan_done(vif->mac, true); - ret = qtnf_cmd_send_change_intf_type(vif, type, mac_addr); + ret = qtnf_cmd_send_change_intf_type(vif, type, use4addr, mac_addr); if (ret) { pr_err("VIF%u.%u: failed to change type to %d\n", vif->mac->macid, vif->vifid, type); @@ -190,6 +191,7 @@ static struct wireless_dev *qtnf_add_virtual_intf(struct wiphy *wiphy, struct qtnf_wmac *mac; struct qtnf_vif *vif; u8 *mac_addr = NULL; + int use4addr = 0; int ret; mac = wiphy_priv(wiphy); @@ -225,10 +227,12 @@ static struct wireless_dev *qtnf_add_virtual_intf(struct wiphy *wiphy, return ERR_PTR(-ENOTSUPP); } - if (params) + if (params) { mac_addr = params->macaddr; + use4addr = params->use_4addr; + } - ret = qtnf_cmd_send_add_intf(vif, type, mac_addr); + ret = qtnf_cmd_send_add_intf(vif, type, use4addr, mac_addr); if (ret) { pr_err("VIF%u.%u: failed to add VIF %pM\n", mac->macid, vif->vifid, mac_addr); @@ -1107,7 +1111,8 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac) wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME | WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD | WIPHY_FLAG_AP_UAPSD | - WIPHY_FLAG_HAS_CHANNEL_SWITCH; + WIPHY_FLAG_HAS_CHANNEL_SWITCH | + WIPHY_FLAG_4ADDR_STATION; wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; if (hw_info->hw_capab & QLINK_HW_CAPAB_DFS_OFFLOAD) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 6575247c8b6b..d03b3d03f4cc 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -734,6 +734,7 @@ out: static int qtnf_cmd_send_add_change_intf(struct qtnf_vif *vif, enum nl80211_iftype iftype, + int use4addr, u8 *mac_addr, enum qlink_cmd_type cmd_type) { @@ -751,6 +752,7 @@ static int qtnf_cmd_send_add_change_intf(struct qtnf_vif *vif, qtnf_bus_lock(vif->mac->bus); cmd = (struct qlink_cmd_manage_intf *)cmd_skb->data; + cmd->intf_info.use4addr = use4addr; switch (iftype) { case NL80211_IFTYPE_AP: @@ -786,17 +788,19 @@ out: return ret; } -int qtnf_cmd_send_add_intf(struct qtnf_vif *vif, - enum nl80211_iftype iftype, u8 *mac_addr) +int qtnf_cmd_send_add_intf(struct qtnf_vif *vif, enum nl80211_iftype iftype, + int use4addr, u8 *mac_addr) { - return qtnf_cmd_send_add_change_intf(vif, iftype, mac_addr, + return qtnf_cmd_send_add_change_intf(vif, iftype, use4addr, mac_addr, QLINK_CMD_ADD_INTF); } int qtnf_cmd_send_change_intf_type(struct qtnf_vif *vif, - enum nl80211_iftype iftype, u8 *mac_addr) + enum nl80211_iftype iftype, + int use4addr, + u8 *mac_addr) { - return qtnf_cmd_send_add_change_intf(vif, iftype, mac_addr, + return qtnf_cmd_send_add_change_intf(vif, iftype, use4addr, mac_addr, QLINK_CMD_CHANGE_INTF); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index 1ac41156c192..1c25e7905e9a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -26,9 +26,11 @@ void qtnf_cmd_send_deinit_fw(struct qtnf_bus *bus); int qtnf_cmd_get_hw_info(struct qtnf_bus *bus); int qtnf_cmd_get_mac_info(struct qtnf_wmac *mac); int qtnf_cmd_send_add_intf(struct qtnf_vif *vif, enum nl80211_iftype iftype, - u8 *mac_addr); + int use4addr, u8 *mac_addr); int qtnf_cmd_send_change_intf_type(struct qtnf_vif *vif, - enum nl80211_iftype iftype, u8 *mac_addr); + enum nl80211_iftype iftype, + int use4addr, + u8 *mac_addr); int qtnf_cmd_send_del_intf(struct qtnf_vif *vif); int qtnf_cmd_band_info_get(struct qtnf_wmac *mac, struct ieee80211_supported_band *band); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 5d18a4a917c9..29258acfa8dc 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -195,6 +195,7 @@ static int qtnf_netdev_set_mac_address(struct net_device *ndev, void *addr) qtnf_scan_done(vif->mac, true); ret = qtnf_cmd_send_change_intf_type(vif, vif->wdev.iftype, + vif->wdev.use_4addr, sa->sa_data); if (ret) @@ -545,7 +546,8 @@ static int qtnf_core_mac_attach(struct qtnf_bus *bus, unsigned int macid) goto error; } - ret = qtnf_cmd_send_add_intf(vif, vif->wdev.iftype, vif->mac_addr); + ret = qtnf_cmd_send_add_intf(vif, vif->wdev.iftype, + vif->wdev.use_4addr, vif->mac_addr); if (ret) { pr_err("MAC%u: failed to add VIF\n", macid); goto error; diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index f9c7f87afaf8..a78cb9e05068 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -105,7 +105,8 @@ struct qlink_intf_info { __le16 if_type; __le16 vlanid; u8 mac_addr[ETH_ALEN]; - u8 rsvd[2]; + u8 use4addr; + u8 rsvd[1]; } __packed; enum qlink_sta_flags { From 9fe504a1317250ab97275e6d0a6a9de39fb60b82 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:40 +0000 Subject: [PATCH 0445/1436] qtnfmac: switch to 32bit values for RTS/FRAG thresholds Host wireless stack uses u32 type for RTS/FRAG threshold values. Switch to u32 in driver: pass u32 values to firmware and let firmware properly adapt these values according to its internal representation. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 8 ++++---- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 4 ++-- drivers/net/wireless/quantenna/qtnfmac/qlink_util.h | 11 +++++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index d03b3d03f4cc..74dcbcfcd694 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1563,11 +1563,11 @@ static int qtnf_cmd_resp_proc_phy_params(struct qtnf_wmac *mac, switch (tlv_type) { case QTN_TLV_ID_FRAG_THRESH: phy_thr = (void *)tlv; - mac_info->frag_thr = (u32)le16_to_cpu(phy_thr->thr); + mac_info->frag_thr = le32_to_cpu(phy_thr->thr); break; case QTN_TLV_ID_RTS_THRESH: phy_thr = (void *)tlv; - mac_info->rts_thr = (u32)le16_to_cpu(phy_thr->thr); + mac_info->rts_thr = le32_to_cpu(phy_thr->thr); break; case QTN_TLV_ID_SRETRY_LIMIT: limit = (void *)tlv; @@ -1815,10 +1815,10 @@ int qtnf_cmd_send_update_phy_params(struct qtnf_wmac *mac, u32 changed) qtnf_bus_lock(mac->bus); if (changed & WIPHY_PARAM_FRAG_THRESHOLD) - qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_FRAG_THRESH, + qtnf_cmd_skb_put_tlv_u32(cmd_skb, QTN_TLV_ID_FRAG_THRESH, wiphy->frag_threshold); if (changed & WIPHY_PARAM_RTS_THRESHOLD) - qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_RTS_THRESH, + qtnf_cmd_skb_put_tlv_u32(cmd_skb, QTN_TLV_ID_RTS_THRESH, wiphy->rts_threshold); if (changed & WIPHY_PARAM_COVERAGE_CLASS) qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_COVERAGE_CLASS, diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index a78cb9e05068..ace52e9d421d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -19,7 +19,7 @@ #include -#define QLINK_PROTO_VER 11 +#define QLINK_PROTO_VER 12 #define QLINK_MACID_RSVD 0xFF #define QLINK_VIFID_RSVD 0xFF @@ -1184,7 +1184,7 @@ struct qlink_iface_limit_record { struct qlink_tlv_frag_rts_thr { struct qlink_tlv_hdr hdr; - __le16 thr; + __le32 thr; } __packed; struct qlink_tlv_rlimit { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index 960d5d97492f..fc87827cb49c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -69,6 +69,17 @@ static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb, memcpy(hdr->val, &tmp, sizeof(tmp)); } +static inline void qtnf_cmd_skb_put_tlv_u32(struct sk_buff *skb, + u16 tlv_id, u32 value) +{ + struct qlink_tlv_hdr *hdr = skb_put(skb, sizeof(*hdr) + sizeof(value)); + __le32 tmp = cpu_to_le32(value); + + hdr->type = cpu_to_le16(tlv_id); + hdr->len = cpu_to_le16(sizeof(value)); + memcpy(hdr->val, &tmp, sizeof(tmp)); +} + u16 qlink_iface_type_to_nl_mask(u16 qlink_type); u8 qlink_chan_width_mask_to_nl(u16 qlink_mask); void qlink_chandef_q2cfg(struct wiphy *wiphy, From f3c8bd46c774153f1d08f10a7baafb2946efa8fa Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:42 +0000 Subject: [PATCH 0446/1436] qtnfmac: do not reject retry changes in driver Do not reject RETRY changes in driver. This decision should belong to firmware. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 5 ----- drivers/net/wireless/quantenna/qtnfmac/commands.c | 8 ++++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index a43f8120df3c..1b02096934f8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -363,11 +363,6 @@ static int qtnf_set_wiphy_params(struct wiphy *wiphy, u32 changed) return -EFAULT; } - if (changed & (WIPHY_PARAM_RETRY_LONG | WIPHY_PARAM_RETRY_SHORT)) { - pr_err("MAC%u: can't modify retry params\n", mac->macid); - return -EOPNOTSUPP; - } - ret = qtnf_cmd_send_update_phy_params(mac, changed); if (ret) pr_err("MAC%u: failed to update PHY params\n", mac->macid); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 74dcbcfcd694..0cb76acbec99 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1824,6 +1824,14 @@ int qtnf_cmd_send_update_phy_params(struct qtnf_wmac *mac, u32 changed) qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_COVERAGE_CLASS, wiphy->coverage_class); + if (changed & WIPHY_PARAM_RETRY_LONG) + qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_LRETRY_LIMIT, + wiphy->retry_long); + + if (changed & WIPHY_PARAM_RETRY_SHORT) + qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_SRETRY_LIMIT, + wiphy->retry_short); + ret = qtnf_cmd_send(mac->bus, cmd_skb); if (ret) goto out; From ff233cb515031d95550958c5797a70222749e9a3 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:45 +0000 Subject: [PATCH 0447/1436] qtnfmac: convert to SPDX license identifiers Replace textual license with SPDX-License-Identifier. Add an SPDX-License-Identifier for the Makefile. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/Makefile | 1 + drivers/net/wireless/quantenna/qtnfmac/bus.h | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/cfg80211.h | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/commands.c | 16 ++-------------- .../net/wireless/quantenna/qtnfmac/commands.h | 16 ++-------------- drivers/net/wireless/quantenna/qtnfmac/core.c | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/core.h | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/debug.c | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/debug.h | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/event.c | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/event.h | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/qlink_util.c | 16 ++-------------- .../net/wireless/quantenna/qtnfmac/qlink_util.h | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/qtn_hw_ids.h | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/shm_ipc.c | 17 ++--------------- .../net/wireless/quantenna/qtnfmac/shm_ipc.h | 17 ++--------------- .../wireless/quantenna/qtnfmac/shm_ipc_defs.h | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/trans.c | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/trans.h | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/util.c | 17 ++--------------- drivers/net/wireless/quantenna/qtnfmac/util.h | 17 ++--------------- 23 files changed, 45 insertions(+), 327 deletions(-) diff --git a/drivers/net/wireless/quantenna/Makefile b/drivers/net/wireless/quantenna/Makefile index baebfbde119e..cea83d178d2e 100644 --- a/drivers/net/wireless/quantenna/Makefile +++ b/drivers/net/wireless/quantenna/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2015-2016 Quantenna Communications, Inc. # All rights reserved. diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h index 528ca7f5e070..7bd906cc7023 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/bus.h +++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015 Quantenna Communications - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015 Quantenna Communications. All rights reserved. */ #ifndef QTNFMAC_BUS_H #define QTNFMAC_BUS_H diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 1b02096934f8..45f4cef7de9c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2012-2012 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h index b73425122a10..c374857283ac 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_CFG80211_H_ #define _QTN_FMAC_CFG80211_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 0cb76acbec99..23c39735ae0c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -1,17 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index 1c25e7905e9a..96dff643bbc4 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -1,17 +1,5 @@ -/* - * Copyright (c) 2016 Quantenna Communications, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2016 Quantenna Communications. All rights reserved. */ #ifndef QLINK_COMMANDS_H_ #define QLINK_COMMANDS_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 29258acfa8dc..ee1b75fda1dd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 293055049caa..a31cff46e964 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_CORE_H_ #define _QTN_FMAC_CORE_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/debug.c b/drivers/net/wireless/quantenna/qtnfmac/debug.c index 9f826b9ef5d9..ad70cdb80060 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/debug.c +++ b/drivers/net/wireless/quantenna/qtnfmac/debug.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include "debug.h" diff --git a/drivers/net/wireless/quantenna/qtnfmac/debug.h b/drivers/net/wireless/quantenna/qtnfmac/debug.h index d6dd12b5d434..61b45536b83a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/debug.h +++ b/drivers/net/wireless/quantenna/qtnfmac/debug.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_DEBUG_H_ #define _QTN_FMAC_DEBUG_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 8b542b431b75..3038a000c287 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.h b/drivers/net/wireless/quantenna/qtnfmac/event.h index ae759b602c2a..533ad99d045d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.h +++ b/drivers/net/wireless/quantenna/qtnfmac/event.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_EVENT_H_ #define _QTN_FMAC_EVENT_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index ace52e9d421d..d958b268de02 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_QLINK_H_ #define _QTN_QLINK_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index aeeda81b09ea..72bfd17cb687 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -1,17 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index fc87827cb49c..781ea7fe79f2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_QLINK_UTIL_H_ #define _QTN_FMAC_QLINK_UTIL_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h b/drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h index 40295a511224..82d879950b62 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qtn_hw_ids.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_HW_IDS_H_ #define _QTN_HW_IDS_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.c b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.c index 2ec334199c2b..ff678951d3b2 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.c +++ b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.h b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.h index c2a3702a9ee7..52cac5439b03 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.h +++ b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_SHM_IPC_H_ #define _QTN_FMAC_SHM_IPC_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc_defs.h b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc_defs.h index 95a5f89a8b1a..78be70df1218 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/shm_ipc_defs.h +++ b/drivers/net/wireless/quantenna/qtnfmac/shm_ipc_defs.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_SHM_IPC_DEFS_H_ #define _QTN_FMAC_SHM_IPC_DEFS_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/trans.c b/drivers/net/wireless/quantenna/qtnfmac/trans.c index 345f34ec9750..95356e280e23 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/trans.c +++ b/drivers/net/wireless/quantenna/qtnfmac/trans.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include #include diff --git a/drivers/net/wireless/quantenna/qtnfmac/trans.h b/drivers/net/wireless/quantenna/qtnfmac/trans.h index 9a473e07af0f..c0b76f871b31 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/trans.h +++ b/drivers/net/wireless/quantenna/qtnfmac/trans.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #ifndef _QTN_FMAC_TRANS_H_ #define _QTN_FMAC_TRANS_H_ diff --git a/drivers/net/wireless/quantenna/qtnfmac/util.c b/drivers/net/wireless/quantenna/qtnfmac/util.c index 3bc96b264769..cda6f5f3f38a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/util.c @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015-2016 Quantenna Communications, Inc. - * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2015-2016 Quantenna Communications. All rights reserved. */ #include "util.h" #include "qtn_hw_ids.h" diff --git a/drivers/net/wireless/quantenna/qtnfmac/util.h b/drivers/net/wireless/quantenna/qtnfmac/util.h index b8744baac332..a14b7078a9c7 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/util.h @@ -1,18 +1,5 @@ -/* - * Copyright (c) 2015 Quantenna Communications - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2015 Quantenna Communications. All rights reserved. */ #ifndef QTNFMAC_UTIL_H #define QTNFMAC_UTIL_H From 23781af74152418055531fe7fec4e55b9502ddc2 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:47 +0000 Subject: [PATCH 0448/1436] qtnfmac: add missing bss record to host scan cache Make sure that valid BSS entry exists in wireless core record even in the case of successful connect reported by firmware. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- .../net/wireless/quantenna/qtnfmac/event.c | 79 ++++++++++++++++++- .../net/wireless/quantenna/qtnfmac/qlink.h | 4 +- 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 3038a000c287..3fd1a9217737 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -145,6 +145,12 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, const struct qlink_event_bss_join *join_info, u16 len) { + struct wiphy *wiphy = priv_to_wiphy(vif->mac); + enum ieee80211_statuscode status = le16_to_cpu(join_info->status); + struct cfg80211_chan_def chandef; + struct cfg80211_bss *bss = NULL; + u8 *ie = NULL; + if (unlikely(len < sizeof(*join_info))) { pr_err("VIF%u.%u: payload is too short (%u < %zu)\n", vif->mac->macid, vif->vifid, len, @@ -158,15 +164,80 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, return -EPROTO; } - pr_debug("VIF%u.%u: BSSID:%pM\n", vif->mac->macid, vif->vifid, - join_info->bssid); + pr_debug("VIF%u.%u: BSSID:%pM status:%u\n", + vif->mac->macid, vif->vifid, join_info->bssid, status); + if (status == WLAN_STATUS_SUCCESS) { + qlink_chandef_q2cfg(wiphy, &join_info->chan, &chandef); + if (!cfg80211_chandef_valid(&chandef)) { + pr_warn("MAC%u.%u: bad channel freq=%u cf1=%u cf2=%u bw=%u\n", + vif->mac->macid, vif->vifid, + chandef.chan->center_freq, + chandef.center_freq1, + chandef.center_freq2, + chandef.width); + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + goto done; + } + + bss = cfg80211_get_bss(wiphy, chandef.chan, join_info->bssid, + NULL, 0, IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_ANY); + if (!bss) { + pr_warn("VIF%u.%u: add missing BSS:%pM chan:%u\n", + vif->mac->macid, vif->vifid, + join_info->bssid, chandef.chan->hw_value); + + if (!vif->wdev.ssid_len) { + pr_warn("VIF%u.%u: SSID unknown for BSS:%pM\n", + vif->mac->macid, vif->vifid, + join_info->bssid); + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + goto done; + } + + ie = kzalloc(2 + vif->wdev.ssid_len, GFP_KERNEL); + if (!ie) { + pr_warn("VIF%u.%u: IE alloc failed for BSS:%pM\n", + vif->mac->macid, vif->vifid, + join_info->bssid); + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + goto done; + } + + ie[0] = WLAN_EID_SSID; + ie[1] = vif->wdev.ssid_len; + memcpy(ie + 2, vif->wdev.ssid, vif->wdev.ssid_len); + + bss = cfg80211_inform_bss(wiphy, chandef.chan, + CFG80211_BSS_FTYPE_UNKNOWN, + join_info->bssid, 0, + WLAN_CAPABILITY_ESS, 100, + ie, 2 + vif->wdev.ssid_len, + 0, GFP_KERNEL); + if (!bss) { + pr_warn("VIF%u.%u: can't connect to unknown BSS: %pM\n", + vif->mac->macid, vif->vifid, + join_info->bssid); + status = WLAN_STATUS_UNSPECIFIED_FAILURE; + goto done; + } + } + } + +done: cfg80211_connect_result(vif->netdev, join_info->bssid, NULL, 0, NULL, - 0, le16_to_cpu(join_info->status), GFP_KERNEL); + 0, status, GFP_KERNEL); + if (bss) { + if (!ether_addr_equal(vif->bssid, join_info->bssid)) + ether_addr_copy(vif->bssid, join_info->bssid); + cfg80211_put_bss(wiphy, bss); + } - if (le16_to_cpu(join_info->status) == WLAN_STATUS_SUCCESS) + if (status == WLAN_STATUS_SUCCESS) netif_carrier_on(vif->netdev); + kfree(ie); return 0; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index d958b268de02..27fdb5b01ee3 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -6,7 +6,7 @@ #include -#define QLINK_PROTO_VER 12 +#define QLINK_PROTO_VER 13 #define QLINK_MACID_RSVD 0xFF #define QLINK_VIFID_RSVD 0xFF @@ -975,11 +975,13 @@ struct qlink_event_sta_deauth { /** * struct qlink_event_bss_join - data for QLINK_EVENT_BSS_JOIN event * + * @chan: new operating channel definition * @bssid: BSSID of a BSS which interface tried to joined. * @status: status of joining attempt, see &enum ieee80211_statuscode. */ struct qlink_event_bss_join { struct qlink_event ehdr; + struct qlink_chandef chan; u8 bssid[ETH_ALEN]; __le16 status; } __packed; From 324b8cad8cfc42fbf1dcffc7ec92967f961e9032 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:49 +0000 Subject: [PATCH 0449/1436] qtnfmac: remove unused declarations Remove declarations for inexistent functions from bus.h header. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/bus.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h index 7bd906cc7023..14b569b6d1b5 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/bus.h +++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h @@ -122,7 +122,5 @@ static __always_inline void qtnf_bus_unlock(struct qtnf_bus *bus) int qtnf_core_attach(struct qtnf_bus *bus); void qtnf_core_detach(struct qtnf_bus *bus); -void qtnf_txflowblock(struct device *dev, bool state); -void qtnf_txcomplete(struct device *dev, struct sk_buff *txp, bool success); #endif /* QTNFMAC_BUS_H */ From 3844dec0f45df0737eec86444e280057fd042507 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 14 Jan 2019 09:39:51 +0000 Subject: [PATCH 0450/1436] qtnfmac: qtnf_cmd_send_with_reply cleanup Use existing variable with dereferenced cmd_id field. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 23c39735ae0c..0f48f541de41 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -87,14 +87,12 @@ static int qtnf_cmd_send_with_reply(struct qtnf_bus *bus, vif_id = cmd->vifid; cmd->mhdr.len = cpu_to_le16(cmd_skb->len); - pr_debug("VIF%u.%u cmd=0x%.4X\n", mac_id, vif_id, - le16_to_cpu(cmd->cmd_id)); + pr_debug("VIF%u.%u cmd=0x%.4X\n", mac_id, vif_id, cmd_id); if (bus->fw_state != QTNF_FW_STATE_ACTIVE && - le16_to_cpu(cmd->cmd_id) != QLINK_CMD_FW_INIT) { + cmd_id != QLINK_CMD_FW_INIT) { pr_warn("VIF%u.%u: drop cmd 0x%.4X in fw state %d\n", - mac_id, vif_id, le16_to_cpu(cmd->cmd_id), - bus->fw_state); + mac_id, vif_id, cmd_id, bus->fw_state); dev_kfree_skb(cmd_skb); return -ENODEV; } @@ -128,7 +126,7 @@ out: return qtnf_cmd_resp_result_decode(le16_to_cpu(resp->result)); pr_warn("VIF%u.%u: cmd 0x%.4X failed: %d\n", - mac_id, vif_id, le16_to_cpu(cmd->cmd_id), ret); + mac_id, vif_id, cmd_id, ret); return ret; } From a1a02062ad466052a34a8c4323143ccf9726eb52 Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Mon, 12 Nov 2018 11:59:12 +0100 Subject: [PATCH 0451/1436] apparmor: Fix warning about unused function apparmor_ipv6_postroute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when compiled without CONFIG_IPV6: security/apparmor/lsm.c:1601:21: warning: ‘apparmor_ipv6_postroute’ defined but not used [-Wunused-function] static unsigned int apparmor_ipv6_postroute(void *priv, ^~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Jordan Glover Tested-by: Jordan Glover Signed-off-by: Petr Vorel Signed-off-by: John Johansen --- security/apparmor/lsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 2c010874329f..8db1731d046a 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1599,12 +1599,14 @@ static unsigned int apparmor_ipv4_postroute(void *priv, return apparmor_ip_postroute(priv, skb, state); } +#if IS_ENABLED(CONFIG_IPV6) static unsigned int apparmor_ipv6_postroute(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return apparmor_ip_postroute(priv, skb, state); } +#endif static const struct nf_hook_ops apparmor_nf_ops[] = { { From a4296994eb8061ee3455721a296c387c639bf635 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Tue, 15 Jan 2019 14:01:29 +0000 Subject: [PATCH 0452/1436] rt2x00: Work around a firmware bug with shared keys Apparently the rt2x61 firmware fails temporarily to decode broadcast packets if the shared keys are not assigned in the "correct" sequence. At the same time unicast packets work fine, since they are encrypted with the pairwise key. At least with WPA2 CCMP mode the shared keys are set in the following sequence: keyidx=1, 2, 1, 2. After a while only keyidx 2 gets decrypted, and keyidx 1 is ignored, probably because there is never a keyidx 3. Symptoms are arping -b works for 10 minutes, since keyidx=2 is used for broadcast, and then it stops working for 10 minutes, because keyidx=1 is used. That failure mode repeats forever. Note, the firmware does not even know which keyidx corresponds to which hw_key_idx so the firmware is trying to be smarter than the driver, which is bound to fail. As workaround the function rt61pci_config_shared_key requests software decryption of the shared keys, by returning EOPNOTSUPP. However, pairwise keys are still handled by hardware which works just fine. Signed-off-by: Bernd Edlinger Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt61pci.c | 93 +------------------- 1 file changed, 4 insertions(+), 89 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c index 4c5de8fc8f12..52b9fc480f8b 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c @@ -321,97 +321,12 @@ static int rt61pci_config_shared_key(struct rt2x00_dev *rt2x00dev, struct rt2x00lib_crypto *crypto, struct ieee80211_key_conf *key) { - struct hw_key_entry key_entry; - struct rt2x00_field32 field; - u32 mask; - u32 reg; - - if (crypto->cmd == SET_KEY) { - /* - * rt2x00lib can't determine the correct free - * key_idx for shared keys. We have 1 register - * with key valid bits. The goal is simple, read - * the register, if that is full we have no slots - * left. - * Note that each BSS is allowed to have up to 4 - * shared keys, so put a mask over the allowed - * entries. - */ - mask = (0xf << crypto->bssidx); - - reg = rt2x00mmio_register_read(rt2x00dev, SEC_CSR0); - reg &= mask; - - if (reg && reg == mask) - return -ENOSPC; - - key->hw_key_idx += reg ? ffz(reg) : 0; - - /* - * Upload key to hardware - */ - memcpy(key_entry.key, crypto->key, - sizeof(key_entry.key)); - memcpy(key_entry.tx_mic, crypto->tx_mic, - sizeof(key_entry.tx_mic)); - memcpy(key_entry.rx_mic, crypto->rx_mic, - sizeof(key_entry.rx_mic)); - - reg = SHARED_KEY_ENTRY(key->hw_key_idx); - rt2x00mmio_register_multiwrite(rt2x00dev, reg, - &key_entry, sizeof(key_entry)); - - /* - * The cipher types are stored over 2 registers. - * bssidx 0 and 1 keys are stored in SEC_CSR1 and - * bssidx 1 and 2 keys are stored in SEC_CSR5. - * Using the correct defines correctly will cause overhead, - * so just calculate the correct offset. - */ - if (key->hw_key_idx < 8) { - field.bit_offset = (3 * key->hw_key_idx); - field.bit_mask = 0x7 << field.bit_offset; - - reg = rt2x00mmio_register_read(rt2x00dev, SEC_CSR1); - rt2x00_set_field32(®, field, crypto->cipher); - rt2x00mmio_register_write(rt2x00dev, SEC_CSR1, reg); - } else { - field.bit_offset = (3 * (key->hw_key_idx - 8)); - field.bit_mask = 0x7 << field.bit_offset; - - reg = rt2x00mmio_register_read(rt2x00dev, SEC_CSR5); - rt2x00_set_field32(®, field, crypto->cipher); - rt2x00mmio_register_write(rt2x00dev, SEC_CSR5, reg); - } - - /* - * The driver does not support the IV/EIV generation - * in hardware. However it doesn't support the IV/EIV - * inside the ieee80211 frame either, but requires it - * to be provided separately for the descriptor. - * rt2x00lib will cut the IV/EIV data out of all frames - * given to us by mac80211, but we must tell mac80211 - * to generate the IV/EIV data. - */ - key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - } - /* - * SEC_CSR0 contains only single-bit fields to indicate - * a particular key is valid. Because using the FIELD32() - * defines directly will cause a lot of overhead, we use - * a calculation to determine the correct bit directly. + * Let the software handle the shared keys, + * since the hardware decryption does not work reliably, + * because the firmware does not know the key's keyidx. */ - mask = 1 << key->hw_key_idx; - - reg = rt2x00mmio_register_read(rt2x00dev, SEC_CSR0); - if (crypto->cmd == SET_KEY) - reg |= mask; - else if (crypto->cmd == DISABLE_KEY) - reg &= ~mask; - rt2x00mmio_register_write(rt2x00dev, SEC_CSR0, reg); - - return 0; + return -EOPNOTSUPP; } static int rt61pci_config_pairwise_key(struct rt2x00_dev *rt2x00dev, From 6dcbe4592e7adf281a0593bb01f818b20d572b37 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Jan 2019 17:25:33 +0000 Subject: [PATCH 0453/1436] libertas: fix indentation issue There is a statement that is incorrectly indented, fix it. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/libertas_tf/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index 1d45da187b9b..a7cb7d06e5e6 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -676,7 +676,7 @@ int lbtf_remove_card(struct lbtf_private *priv) ieee80211_unregister_hw(hw); ieee80211_free_hw(hw); - lbtf_deb_leave(LBTF_DEB_MAIN); + lbtf_deb_leave(LBTF_DEB_MAIN); return 0; } EXPORT_SYMBOL_GPL(lbtf_remove_card); From b0535d502d1804197f3aaaaf5a3429c5779268e4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Jan 2019 14:36:23 +0000 Subject: [PATCH 0454/1436] cw1200: fix indentation issues There are two lines that have indentation issues, fix these. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/st/cw1200/fwio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/fwio.c b/drivers/net/wireless/st/cw1200/fwio.c index 30e7646d04af..b7881232499c 100644 --- a/drivers/net/wireless/st/cw1200/fwio.c +++ b/drivers/net/wireless/st/cw1200/fwio.c @@ -465,8 +465,8 @@ int cw1200_load_firmware(struct cw1200_common *priv) if (!(val32 & ST90TDS_CONFIG_ACCESS_MODE_BIT)) { pr_err("Device is already in QUEUE mode!\n"); - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } switch (priv->hw_type) { From 2d76fff865d64974da79e64bb5e9ad63c20bbb9f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Jan 2019 15:28:59 +0000 Subject: [PATCH 0455/1436] rtlwifi: rtl818x: fix indentation issue There is a statement that is indented too deeply. Fix this. Signed-off-by: Colin Ian King Acked-by: John W. Linville Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index 225c1c8851cc..e2b1bfbcfbd4 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -803,7 +803,7 @@ static void rtl8180_config_cardbus(struct ieee80211_hw *dev) rtl818x_iowrite16(priv, FEMR_SE, 0xffff); } else { reg16 = rtl818x_ioread16(priv, &priv->map->FEMR); - reg16 |= (1 << 15) | (1 << 14) | (1 << 4); + reg16 |= (1 << 15) | (1 << 14) | (1 << 4); rtl818x_iowrite16(priv, &priv->map->FEMR, reg16); } From 999eb686aa909c4609ae336979327bcf3d876462 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Jan 2019 11:32:15 +0800 Subject: [PATCH 0456/1436] wireless: remove unneeded semicolon remove unneeded semicolon Signed-off-by: YueHaibing Acked-by: Ping-Ke Shih Acked-by: Steve deRosier Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/wmi.h | 2 +- drivers/net/wireless/ath/ath6kl/init.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 2 +- drivers/net/wireless/ray_cs.c | 2 +- drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 2034ccc7cc72..021eb30d1fb7 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -1986,7 +1986,7 @@ static inline const char *ath10k_wmi_phymode_str(enum wmi_phy_mode mode) /* no default handler to allow compiler to check that the * enum is fully handled */ - }; + } return ""; } diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c index 54132af70094..aa1c71a76ef7 100644 --- a/drivers/net/wireless/ath/ath6kl/init.c +++ b/drivers/net/wireless/ath/ath6kl/init.c @@ -1140,7 +1140,7 @@ static int ath6kl_fetch_fw_apin(struct ath6kl *ar, const char *name) len -= ie_len; data += ie_len; - }; + } ret = 0; out: diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index d64bf233b12c..ec129864cc9c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -315,7 +315,7 @@ static int brcmf_sdiod_skbuff_read(struct brcmf_sdio_dev *sdiodev, /* bail out as things are really fishy here */ WARN(1, "invalid sdio function number: %d\n", func->num); err = -ENOMEDIUM; - }; + } if (err == -ENOMEDIUM) brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_NOMEDIUM); diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 8b2741c8edf2..44a943d18b84 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -2211,7 +2211,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, untranslate(local, skb, total_len); } } else { /* sniffer mode, so just pass whole packet */ - }; + } /************************/ /* Now pick up the rest of the fragments if any */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 1263b12db5dc..11f94b1a436f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -332,7 +332,7 @@ static void _rtl8723be_phy_set_txpower_by_rate_base(struct ieee80211_hw *hw, "Invalid RateSection %d in Band 2.4G, Rf Path %d, %dTx in PHY_SetTxPowerByRateBase()\n", rate_section, path, txnum); break; - }; + } } else { RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Invalid Band %d in PHY_SetTxPowerByRateBase()\n", @@ -374,7 +374,7 @@ static u8 _rtl8723be_phy_get_txpower_by_rate_base(struct ieee80211_hw *hw, "Invalid RateSection %d in Band 2.4G, Rf Path %d, %dTx in PHY_GetTxPowerByRateBase()\n", rate_section, path, txnum); break; - }; + } } else { RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Invalid Band %d in PHY_GetTxPowerByRateBase()\n", @@ -694,7 +694,7 @@ static u8 _rtl8723be_get_rate_section_index(u32 regaddr) else if (regaddr >= 0xE20 && regaddr <= 0xE4C) index = (u8)((regaddr - 0xE20) / 4); break; - }; + } return index; } From 1e1b4161e85971c5644d627fc174a4502907d563 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Fri, 18 Jan 2019 14:48:07 +0530 Subject: [PATCH 0457/1436] rsi: Suppress sdhci warnings in mmc while inserting and removing sdio module multiple times, we are getting sdhci warnings. This is because, improper assignment of ocr_avail value. Fixed this by assigning proper value. This patch is enhancement for commit 78e450719c702 ("rsi: Fix 'invalid vdd' warning in mmc"). Signed-off-by: Siva Rebbagondla Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 5733e440ecaf..b412b65eb1f4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -230,16 +230,19 @@ static void rsi_reset_card(struct sdio_func *pfunction) rsi_dbg(ERR_ZONE, "%s: CMD0 failed : %d\n", __func__, err); /* Issue CMD5, arg = 0 */ - err = rsi_issue_sdiocommand(pfunction, SD_IO_SEND_OP_COND, 0, - (MMC_RSP_R4 | MMC_CMD_BCR), &resp); - if (err) - rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n", __func__, err); - card->ocr = resp; + if (!host->ocr_avail) { + err = rsi_issue_sdiocommand(pfunction, SD_IO_SEND_OP_COND, 0, + (MMC_RSP_R4 | MMC_CMD_BCR), &resp); + if (err) + rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n", + __func__, err); + host->ocr_avail = resp; + } /* Issue CMD5, arg = ocr. Wait till card is ready */ for (i = 0; i < 100; i++) { err = rsi_issue_sdiocommand(pfunction, SD_IO_SEND_OP_COND, - card->ocr, + host->ocr_avail, (MMC_RSP_R4 | MMC_CMD_BCR), &resp); if (err) { rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n", From 112ec26fcdc55ee1ecfe98ee9e1d6852daa25649 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Jan 2019 23:12:31 +0000 Subject: [PATCH 0458/1436] wlcore: clean up an indentation issue There is a goto statement that is missing a tab for indentation. Fix this. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index 903968735a74..348be0aed97e 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1427,7 +1427,7 @@ int wl1271_cmd_set_sta_key(struct wl1271 *wl, struct wl12xx_vif *wlvif, ret = wl1271_cmd_send(wl, CMD_SET_KEYS, cmd, sizeof(*cmd), 0); if (ret < 0) { wl1271_warning("could not set keys"); - goto out; + goto out; } out: From 3bd1505fed71d834f45e87b32ff07157fdda47e0 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 22 Jan 2019 13:47:54 +0100 Subject: [PATCH 0459/1436] mt7601u: bump supported EEPROM version As reported by Michael eeprom 0d is supported and work with the driver. Dump of /sys/kernel/debug/ieee80211/phy1/mt7601u/eeprom_param with 0d EEPORM looks like this: RSSI offset: 0 0 Reference temp: f9 LNA gain: 8 Reg channels: 1-14 Per rate power: raw:05 bw20:05 bw40:05 raw:05 bw20:05 bw40:05 raw:03 bw20:03 bw40:03 raw:03 bw20:03 bw40:03 raw:04 bw20:04 bw40:04 raw:00 bw20:00 bw40:00 raw:00 bw20:00 bw40:00 raw:00 bw20:00 bw40:00 raw:02 bw20:02 bw40:02 raw:00 bw20:00 bw40:00 Per channel power: tx_power ch1:09 ch2:09 tx_power ch3:0a ch4:0a tx_power ch5:0a ch6:0a tx_power ch7:0b ch8:0b tx_power ch9:0b ch10:0b tx_power ch11:0b ch12:0b tx_power ch13:0b ch14:0b Reported-and-tested-by: Michael Signed-off-by: Stanislaw Gruszka Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/eeprom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.h b/drivers/net/wireless/mediatek/mt7601u/eeprom.h index 662d12703b69..57b503ae63f1 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.h +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.h @@ -17,7 +17,7 @@ struct mt7601u_dev; -#define MT7601U_EE_MAX_VER 0x0c +#define MT7601U_EE_MAX_VER 0x0d #define MT7601U_EEPROM_SIZE 256 #define MT7601U_DEFAULT_TX_POWER 6 From 71ee1284d6267904dc2ef0172a30a93d4a2e8234 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:18 +0100 Subject: [PATCH 0460/1436] iwlegacy: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Stanislaw Gruszka Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- .../net/wireless/intel/iwlegacy/3945-mac.c | 5 +-- .../net/wireless/intel/iwlegacy/4965-mac.c | 5 +-- drivers/net/wireless/intel/iwlegacy/common.h | 6 ++-- drivers/net/wireless/intel/iwlegacy/debug.c | 34 +++---------------- 4 files changed, 8 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 57e3b6cca234..271977f7fbb0 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -3756,10 +3756,7 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto out_remove_sysfs; - err = il_dbgfs_register(il, DRV_NAME); - if (err) - IL_ERR("failed to create debugfs files. Ignoring error: %d\n", - err); + il_dbgfs_register(il, DRV_NAME); /* Start monitoring the killswitch */ queue_delayed_work(il->workqueue, &il->_3945.rfkill_poll, 2 * HZ); diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 6b4488a178a7..94222ae464ae 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4988,10 +4988,7 @@ il4965_ucode_callback(const struct firmware *ucode_raw, void *context) if (err) goto out_unbind; - err = il_dbgfs_register(il, DRV_NAME); - if (err) - IL_ERR("failed to create debugfs files. Ignoring error: %d\n", - err); + il_dbgfs_register(il, DRV_NAME); err = sysfs_create_group(&il->pci_dev->dev.kobj, &il_attribute_group); if (err) { diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index dc6a74a05983..b079c64ca014 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -2974,13 +2974,11 @@ il_print_hex_dump(struct il_priv *il, int level, const void *p, u32 len) #endif /* CONFIG_IWLEGACY_DEBUG */ #ifdef CONFIG_IWLEGACY_DEBUGFS -int il_dbgfs_register(struct il_priv *il, const char *name); +void il_dbgfs_register(struct il_priv *il, const char *name); void il_dbgfs_unregister(struct il_priv *il); #else -static inline int -il_dbgfs_register(struct il_priv *il, const char *name) +static inline void il_dbgfs_register(struct il_priv *il, const char *name) { - return 0; } static inline void diff --git a/drivers/net/wireless/intel/iwlegacy/debug.c b/drivers/net/wireless/intel/iwlegacy/debug.c index d76073def677..fa211412e0ac 100644 --- a/drivers/net/wireless/intel/iwlegacy/debug.c +++ b/drivers/net/wireless/intel/iwlegacy/debug.c @@ -128,23 +128,12 @@ EXPORT_SYMBOL(il_update_stats); /* create and remove of files */ #define DEBUGFS_ADD_FILE(name, parent, mode) do { \ - if (!debugfs_create_file(#name, mode, parent, il, \ - &il_dbgfs_##name##_ops)) \ - goto err; \ + debugfs_create_file(#name, mode, parent, il, \ + &il_dbgfs_##name##_ops); \ } while (0) #define DEBUGFS_ADD_BOOL(name, parent, ptr) do { \ - struct dentry *__tmp; \ - __tmp = debugfs_create_bool(#name, 0600, parent, ptr); \ - if (IS_ERR(__tmp) || !__tmp) \ - goto err; \ -} while (0) - -#define DEBUGFS_ADD_X32(name, parent, ptr) do { \ - struct dentry *__tmp; \ - __tmp = debugfs_create_x32(#name, 0600, parent, ptr); \ - if (IS_ERR(__tmp) || !__tmp) \ - goto err; \ + debugfs_create_bool(#name, 0600, parent, ptr); \ } while (0) /* file operation */ @@ -1341,27 +1330,18 @@ DEBUGFS_WRITE_FILE_OPS(wd_timeout); * Create the debugfs files and directories * */ -int +void il_dbgfs_register(struct il_priv *il, const char *name) { struct dentry *phyd = il->hw->wiphy->debugfsdir; struct dentry *dir_drv, *dir_data, *dir_rf, *dir_debug; dir_drv = debugfs_create_dir(name, phyd); - if (!dir_drv) - return -ENOMEM; - il->debugfs_dir = dir_drv; dir_data = debugfs_create_dir("data", dir_drv); - if (!dir_data) - goto err; dir_rf = debugfs_create_dir("rf", dir_drv); - if (!dir_rf) - goto err; dir_debug = debugfs_create_dir("debug", dir_drv); - if (!dir_debug) - goto err; DEBUGFS_ADD_FILE(nvm, dir_data, 0400); DEBUGFS_ADD_FILE(sram, dir_data, 0600); @@ -1399,12 +1379,6 @@ il_dbgfs_register(struct il_priv *il, const char *name) DEBUGFS_ADD_BOOL(disable_chain_noise, dir_rf, &il->disable_chain_noise_cal); DEBUGFS_ADD_BOOL(disable_tx_power, dir_rf, &il->disable_tx_power_cal); - return 0; - -err: - IL_ERR("Can't create the debugfs directory\n"); - il_dbgfs_unregister(il); - return -ENOMEM; } EXPORT_SYMBOL(il_dbgfs_register); From ad2106ca00dfce85ccae7dea28168c3d9e6cc800 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:24 +0100 Subject: [PATCH 0461/1436] wlcore: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: Tony Lindgren Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/debugfs.c | 28 ++++-------------------- drivers/net/wireless/ti/wlcore/debugfs.h | 10 +++------ 2 files changed, 7 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/debugfs.c b/drivers/net/wireless/ti/wlcore/debugfs.c index aeb74e74698e..68acd901d384 100644 --- a/drivers/net/wireless/ti/wlcore/debugfs.c +++ b/drivers/net/wireless/ti/wlcore/debugfs.c @@ -1301,11 +1301,10 @@ static const struct file_operations fw_logger_ops = { .llseek = default_llseek, }; -static int wl1271_debugfs_add_files(struct wl1271 *wl, - struct dentry *rootdir) +static void wl1271_debugfs_add_files(struct wl1271 *wl, + struct dentry *rootdir) { - int ret = 0; - struct dentry *entry, *streaming; + struct dentry *streaming; DEBUGFS_ADD(tx_queue_len, rootdir); DEBUGFS_ADD(retry_count, rootdir); @@ -1330,23 +1329,11 @@ static int wl1271_debugfs_add_files(struct wl1271 *wl, DEBUGFS_ADD(fw_logger, rootdir); streaming = debugfs_create_dir("rx_streaming", rootdir); - if (!streaming || IS_ERR(streaming)) - goto err; DEBUGFS_ADD_PREFIX(rx_streaming, interval, streaming); DEBUGFS_ADD_PREFIX(rx_streaming, always, streaming); DEBUGFS_ADD_PREFIX(dev, mem, rootdir); - - return 0; - -err: - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - else - ret = -ENOMEM; - - return ret; } void wl1271_debugfs_reset(struct wl1271 *wl) @@ -1367,11 +1354,6 @@ int wl1271_debugfs_init(struct wl1271 *wl) rootdir = debugfs_create_dir(KBUILD_MODNAME, wl->hw->wiphy->debugfsdir); - if (IS_ERR(rootdir)) { - ret = PTR_ERR(rootdir); - goto out; - } - wl->stats.fw_stats = kzalloc(wl->stats.fw_stats_len, GFP_KERNEL); if (!wl->stats.fw_stats) { ret = -ENOMEM; @@ -1380,9 +1362,7 @@ int wl1271_debugfs_init(struct wl1271 *wl) wl->stats.fw_stats_update = jiffies; - ret = wl1271_debugfs_add_files(wl, rootdir); - if (ret < 0) - goto out_exit; + wl1271_debugfs_add_files(wl, rootdir); ret = wlcore_debugfs_init(wl, rootdir); if (ret < 0) diff --git a/drivers/net/wireless/ti/wlcore/debugfs.h b/drivers/net/wireless/ti/wlcore/debugfs.h index bf14676e6515..a4952c4f587e 100644 --- a/drivers/net/wireless/ti/wlcore/debugfs.h +++ b/drivers/net/wireless/ti/wlcore/debugfs.h @@ -53,19 +53,15 @@ static const struct file_operations name## _ops = { \ #define DEBUGFS_ADD(name, parent) \ do { \ - entry = debugfs_create_file(#name, 0400, parent, \ - wl, &name## _ops); \ - if (!entry || IS_ERR(entry)) \ - goto err; \ + debugfs_create_file(#name, 0400, parent, \ + wl, &name## _ops); \ } while (0) #define DEBUGFS_ADD_PREFIX(prefix, name, parent) \ do { \ - entry = debugfs_create_file(#name, 0400, parent, \ + debugfs_create_file(#name, 0400, parent, \ wl, &prefix## _## name## _ops); \ - if (!entry || IS_ERR(entry)) \ - goto err; \ } while (0) #define DEBUGFS_FWSTATS_FILE(sub, name, fmt, struct_type) \ From 72efec9b67ae9a153d83cd75b4e04c85bffa4683 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:25 +0100 Subject: [PATCH 0462/1436] wl1251: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: Tony Lindgren Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl1251/debugfs.c | 59 ++---------------------- 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/drivers/net/wireless/ti/wl1251/debugfs.c b/drivers/net/wireless/ti/wl1251/debugfs.c index 448da1f8c22f..c99b23aaa70e 100644 --- a/drivers/net/wireless/ti/wl1251/debugfs.c +++ b/drivers/net/wireless/ti/wl1251/debugfs.c @@ -54,11 +54,6 @@ static const struct file_operations name## _ops = { \ #define DEBUGFS_ADD(name, parent) \ wl->debugfs.name = debugfs_create_file(#name, 0400, parent, \ wl, &name## _ops); \ - if (IS_ERR(wl->debugfs.name)) { \ - ret = PTR_ERR(wl->debugfs.name); \ - wl->debugfs.name = NULL; \ - goto out; \ - } #define DEBUGFS_DEL(name) \ do { \ @@ -354,10 +349,8 @@ static void wl1251_debugfs_delete_files(struct wl1251 *wl) DEBUGFS_DEL(excessive_retries); } -static int wl1251_debugfs_add_files(struct wl1251 *wl) +static void wl1251_debugfs_add_files(struct wl1251 *wl) { - int ret = 0; - DEBUGFS_FWSTATS_ADD(tx, internal_desc_overflow); DEBUGFS_FWSTATS_ADD(rx, out_of_mem); @@ -453,12 +446,6 @@ static int wl1251_debugfs_add_files(struct wl1251 *wl) DEBUGFS_ADD(tx_queue_status, wl->debugfs.rootdir); DEBUGFS_ADD(retry_count, wl->debugfs.rootdir); DEBUGFS_ADD(excessive_retries, wl->debugfs.rootdir); - -out: - if (ret < 0) - wl1251_debugfs_delete_files(wl); - - return ret; } void wl1251_debugfs_reset(struct wl1251 *wl) @@ -471,56 +458,20 @@ void wl1251_debugfs_reset(struct wl1251 *wl) int wl1251_debugfs_init(struct wl1251 *wl) { - int ret; + wl->stats.fw_stats = kzalloc(sizeof(*wl->stats.fw_stats), GFP_KERNEL); + if (!wl->stats.fw_stats) + return -ENOMEM; wl->debugfs.rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL); - if (IS_ERR(wl->debugfs.rootdir)) { - ret = PTR_ERR(wl->debugfs.rootdir); - wl->debugfs.rootdir = NULL; - goto err; - } - wl->debugfs.fw_statistics = debugfs_create_dir("fw-statistics", wl->debugfs.rootdir); - if (IS_ERR(wl->debugfs.fw_statistics)) { - ret = PTR_ERR(wl->debugfs.fw_statistics); - wl->debugfs.fw_statistics = NULL; - goto err_root; - } - - wl->stats.fw_stats = kzalloc(sizeof(*wl->stats.fw_stats), - GFP_KERNEL); - - if (!wl->stats.fw_stats) { - ret = -ENOMEM; - goto err_fw; - } - wl->stats.fw_stats_update = jiffies; - ret = wl1251_debugfs_add_files(wl); - - if (ret < 0) - goto err_file; + wl1251_debugfs_add_files(wl); return 0; - -err_file: - kfree(wl->stats.fw_stats); - wl->stats.fw_stats = NULL; - -err_fw: - debugfs_remove(wl->debugfs.fw_statistics); - wl->debugfs.fw_statistics = NULL; - -err_root: - debugfs_remove(wl->debugfs.rootdir); - wl->debugfs.rootdir = NULL; - -err: - return ret; } void wl1251_debugfs_exit(struct wl1251 *wl) From 23f389660301d117340228f1077374b1d6ad8ed2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:26 +0100 Subject: [PATCH 0463/1436] wl12xx: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: Tony Lindgren Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl12xx/debugfs.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/net/wireless/ti/wl12xx/debugfs.c b/drivers/net/wireless/ti/wl12xx/debugfs.c index 0521cbf858cf..6c3c04eca8fd 100644 --- a/drivers/net/wireless/ti/wl12xx/debugfs.c +++ b/drivers/net/wireless/ti/wl12xx/debugfs.c @@ -125,20 +125,10 @@ WL12XX_DEBUGFS_FWSTATS_FILE(rxpipe, tx_xfr_host_int_trig_rx_data, "%u"); int wl12xx_debugfs_add_files(struct wl1271 *wl, struct dentry *rootdir) { - int ret = 0; - struct dentry *entry, *stats, *moddir; + struct dentry *stats, *moddir; moddir = debugfs_create_dir(KBUILD_MODNAME, rootdir); - if (!moddir || IS_ERR(moddir)) { - entry = moddir; - goto err; - } - stats = debugfs_create_dir("fw_stats", moddir); - if (!stats || IS_ERR(stats)) { - entry = stats; - goto err; - } DEBUGFS_FWSTATS_ADD(tx, internal_desc_overflow); @@ -232,12 +222,4 @@ int wl12xx_debugfs_add_files(struct wl1271 *wl, DEBUGFS_FWSTATS_ADD(rxpipe, tx_xfr_host_int_trig_rx_data); return 0; - -err: - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - else - ret = -ENOMEM; - - return ret; } From c922a3a02d2b41c0d1adc6853eface758f397472 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:27 +0100 Subject: [PATCH 0464/1436] wl18xx: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: Tony Lindgren Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wl18xx/debugfs.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/net/wireless/ti/wl18xx/debugfs.c b/drivers/net/wireless/ti/wl18xx/debugfs.c index 597e934c4630..5f4ec997ca59 100644 --- a/drivers/net/wireless/ti/wl18xx/debugfs.c +++ b/drivers/net/wireless/ti/wl18xx/debugfs.c @@ -422,20 +422,10 @@ static const struct file_operations radar_debug_mode_ops = { int wl18xx_debugfs_add_files(struct wl1271 *wl, struct dentry *rootdir) { - int ret = 0; - struct dentry *entry, *stats, *moddir; + struct dentry *stats, *moddir; moddir = debugfs_create_dir(KBUILD_MODNAME, rootdir); - if (!moddir || IS_ERR(moddir)) { - entry = moddir; - goto err; - } - stats = debugfs_create_dir("fw_stats", moddir); - if (!stats || IS_ERR(stats)) { - entry = stats; - goto err; - } DEBUGFS_ADD(clear_fw_stats, stats); @@ -590,12 +580,4 @@ int wl18xx_debugfs_add_files(struct wl1271 *wl, DEBUGFS_ADD(dynamic_fw_traces, moddir); return 0; - -err: - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - else - ret = -ENOMEM; - - return ret; } From 3059785bb76e8df7d6585f9b9e2869bd1bd489f6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:33 +0100 Subject: [PATCH 0465/1436] rtlwifi: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Ping-Ke Shih Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/debug.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c index d70385be9976..8186650efc56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/debug.c +++ b/drivers/net/wireless/realtek/rtlwifi/debug.c @@ -463,12 +463,9 @@ static const struct file_operations file_ops_common_write = { #define RTL_DEBUGFS_ADD_CORE(name, mode, fopname) \ do { \ rtl_debug_priv_ ##name.rtlpriv = rtlpriv; \ - if (!debugfs_create_file(#name, mode, \ - parent, &rtl_debug_priv_ ##name, \ - &file_ops_ ##fopname)) \ - pr_err("Unable to initialize debugfs:%s/%s\n", \ - rtlpriv->dbg.debugfs_name, \ - #name); \ + debugfs_create_file(#name, mode, parent, \ + &rtl_debug_priv_ ##name, \ + &file_ops_ ##fopname); \ } while (0) #define RTL_DEBUGFS_ADD(name) \ @@ -486,11 +483,6 @@ void rtl_debug_add_one(struct ieee80211_hw *hw) rtlpriv->dbg.debugfs_dir = debugfs_create_dir(rtlpriv->dbg.debugfs_name, debugfs_topdir); - if (!rtlpriv->dbg.debugfs_dir) { - pr_err("Unable to init debugfs:/%s/%s\n", rtlpriv->cfg->name, - rtlpriv->dbg.debugfs_name); - return; - } parent = rtlpriv->dbg.debugfs_dir; From b089e6944af34be0e3a0345903e82b4bdc01c4af Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:35 +0100 Subject: [PATCH 0466/1436] qtnfmac: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Igor Mitsyanko Cc: Avinash Patil Cc: Sergey Matyukevich Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Reviewed-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/debug.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/debug.c b/drivers/net/wireless/quantenna/qtnfmac/debug.c index ad70cdb80060..598ece753a4b 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/debug.c +++ b/drivers/net/wireless/quantenna/qtnfmac/debug.c @@ -3,17 +3,9 @@ #include "debug.h" -#undef pr_fmt -#define pr_fmt(fmt) "qtnfmac dbg: %s: " fmt, __func__ - void qtnf_debugfs_init(struct qtnf_bus *bus, const char *name) { bus->dbg_dir = debugfs_create_dir(name, NULL); - - if (IS_ERR_OR_NULL(bus->dbg_dir)) { - pr_warn("failed to create debugfs root dir\n"); - bus->dbg_dir = NULL; - } } void qtnf_debugfs_remove(struct qtnf_bus *bus) @@ -25,9 +17,5 @@ void qtnf_debugfs_remove(struct qtnf_bus *bus) void qtnf_debugfs_add_entry(struct qtnf_bus *bus, const char *name, int (*fn)(struct seq_file *seq, void *data)) { - struct dentry *entry; - - entry = debugfs_create_devm_seqfile(bus->dev, name, bus->dbg_dir, fn); - if (IS_ERR_OR_NULL(entry)) - pr_warn("failed to add entry (%s)\n", name); + debugfs_create_devm_seqfile(bus->dev, name, bus->dbg_dir, fn); } From 80f4c5e3662adad45d63dbf9bd9d524304c3eb1c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:28 +0100 Subject: [PATCH 0467/1436] cw1200: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Solomon Peachy Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo --- drivers/net/wireless/st/cw1200/debug.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/debug.c b/drivers/net/wireless/st/cw1200/debug.c index 2231ba08bc1f..d94266d9d0b8 100644 --- a/drivers/net/wireless/st/cw1200/debug.c +++ b/drivers/net/wireless/st/cw1200/debug.c @@ -371,28 +371,14 @@ int cw1200_debug_init(struct cw1200_common *priv) d->debugfs_phy = debugfs_create_dir("cw1200", priv->hw->wiphy->debugfsdir); - if (!d->debugfs_phy) - goto err; - - if (!debugfs_create_file("status", 0400, d->debugfs_phy, - priv, &cw1200_status_fops)) - goto err; - - if (!debugfs_create_file("counters", 0400, d->debugfs_phy, - priv, &cw1200_counters_fops)) - goto err; - - if (!debugfs_create_file("wsm_dumps", 0200, d->debugfs_phy, - priv, &fops_wsm_dumps)) - goto err; + debugfs_create_file("status", 0400, d->debugfs_phy, priv, + &cw1200_status_fops); + debugfs_create_file("counters", 0400, d->debugfs_phy, priv, + &cw1200_counters_fops); + debugfs_create_file("wsm_dumps", 0200, d->debugfs_phy, priv, + &fops_wsm_dumps); return 0; - -err: - priv->debug = NULL; - debugfs_remove_recursive(d->debugfs_phy); - kfree(d); - return ret; } void cw1200_debug_release(struct cw1200_common *priv) From e9bdcdc7cdf02819ea3f4205c7286ab97a9e1a1b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:29 +0100 Subject: [PATCH 0468/1436] b43: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: b43-dev@lists.infradead.org Signed-off-by: Greg Kroah-Hartman Acked-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/debugfs.c | 36 +++++---------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/debugfs.c b/drivers/net/wireless/broadcom/b43/debugfs.c index 77046384dd80..976c8ec4e992 100644 --- a/drivers/net/wireless/broadcom/b43/debugfs.c +++ b/drivers/net/wireless/broadcom/b43/debugfs.c @@ -668,15 +668,13 @@ static void b43_remove_dynamic_debug(struct b43_wldev *dev) static void b43_add_dynamic_debug(struct b43_wldev *dev) { struct b43_dfsentry *e = dev->dfsentry; - struct dentry *d; -#define add_dyn_dbg(name, id, initstate) do { \ - e->dyn_debug[id] = (initstate); \ - d = debugfs_create_bool(name, 0600, e->subdir, \ - &(e->dyn_debug[id])); \ - if (!IS_ERR(d)) \ - e->dyn_debug_dentries[id] = d; \ - } while (0) +#define add_dyn_dbg(name, id, initstate) do { \ + e->dyn_debug[id] = (initstate); \ + e->dyn_debug_dentries[id] = \ + debugfs_create_bool(name, 0600, e->subdir, \ + &(e->dyn_debug[id])); \ + } while (0) add_dyn_dbg("debug_xmitpower", B43_DBG_XMITPOWER, false); add_dyn_dbg("debug_dmaoverflow", B43_DBG_DMAOVERFLOW, false); @@ -718,19 +716,6 @@ void b43_debugfs_add_device(struct b43_wldev *dev) snprintf(devdir, sizeof(devdir), "%s", wiphy_name(dev->wl->hw->wiphy)); e->subdir = debugfs_create_dir(devdir, rootdir); - if (!e->subdir || IS_ERR(e->subdir)) { - if (e->subdir == ERR_PTR(-ENODEV)) { - b43dbg(dev->wl, "DebugFS (CONFIG_DEBUG_FS) not " - "enabled in kernel config\n"); - } else { - b43err(dev->wl, "debugfs: cannot create %s directory\n", - devdir); - } - dev->dfsentry = NULL; - kfree(log->log); - kfree(e); - return; - } e->mmio16read_next = 0xFFFF; /* invalid address */ e->mmio32read_next = 0xFFFF; /* invalid address */ @@ -741,13 +726,10 @@ void b43_debugfs_add_device(struct b43_wldev *dev) #define ADD_FILE(name, mode) \ do { \ - struct dentry *d; \ - d = debugfs_create_file(__stringify(name), \ + e->file_##name.dentry = \ + debugfs_create_file(__stringify(name), \ mode, e->subdir, dev, \ &fops_##name.fops); \ - e->file_##name.dentry = NULL; \ - if (!IS_ERR(d)) \ - e->file_##name.dentry = d; \ } while (0) @@ -818,8 +800,6 @@ void b43_debugfs_log_txstat(struct b43_wldev *dev, void b43_debugfs_init(void) { rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL); - if (IS_ERR(rootdir)) - rootdir = NULL; } void b43_debugfs_exit(void) From 32b4ebfe7f12c2481a2aea8372683142d1341a4e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:30 +0100 Subject: [PATCH 0469/1436] b43legacy: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Larry Finger Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: b43-dev@lists.infradead.org Signed-off-by: Greg Kroah-Hartman Acked-by: Larry Finger Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/b43legacy/debugfs.c | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/debugfs.c b/drivers/net/wireless/broadcom/b43legacy/debugfs.c index 82ef56ed7ca1..8150adee3e34 100644 --- a/drivers/net/wireless/broadcom/b43legacy/debugfs.c +++ b/drivers/net/wireless/broadcom/b43legacy/debugfs.c @@ -361,15 +361,13 @@ static void b43legacy_remove_dynamic_debug(struct b43legacy_wldev *dev) static void b43legacy_add_dynamic_debug(struct b43legacy_wldev *dev) { struct b43legacy_dfsentry *e = dev->dfsentry; - struct dentry *d; -#define add_dyn_dbg(name, id, initstate) do { \ - e->dyn_debug[id] = (initstate); \ - d = debugfs_create_bool(name, 0600, e->subdir, \ - &(e->dyn_debug[id])); \ - if (!IS_ERR(d)) \ - e->dyn_debug_dentries[id] = d; \ - } while (0) +#define add_dyn_dbg(name, id, initstate) do { \ + e->dyn_debug[id] = (initstate); \ + e->dyn_debug_dentries[id] = \ + debugfs_create_bool(name, 0600, e->subdir, \ + &(e->dyn_debug[id])); \ + } while (0) add_dyn_dbg("debug_xmitpower", B43legacy_DBG_XMITPOWER, false); add_dyn_dbg("debug_dmaoverflow", B43legacy_DBG_DMAOVERFLOW, false); @@ -408,29 +406,14 @@ void b43legacy_debugfs_add_device(struct b43legacy_wldev *dev) snprintf(devdir, sizeof(devdir), "%s", wiphy_name(dev->wl->hw->wiphy)); e->subdir = debugfs_create_dir(devdir, rootdir); - if (!e->subdir || IS_ERR(e->subdir)) { - if (e->subdir == ERR_PTR(-ENODEV)) { - b43legacydbg(dev->wl, "DebugFS (CONFIG_DEBUG_FS) not " - "enabled in kernel config\n"); - } else { - b43legacyerr(dev->wl, "debugfs: cannot create %s directory\n", - devdir); - } - dev->dfsentry = NULL; - kfree(log->log); - kfree(e); - return; - } #define ADD_FILE(name, mode) \ do { \ - struct dentry *d; \ - d = debugfs_create_file(__stringify(name), \ + e->file_##name.dentry = \ + debugfs_create_file(__stringify(name), \ mode, e->subdir, dev, \ &fops_##name.fops); \ e->file_##name.dentry = NULL; \ - if (!IS_ERR(d)) \ - e->file_##name.dentry = d; \ } while (0) @@ -492,8 +475,6 @@ void b43legacy_debugfs_log_txstat(struct b43legacy_wldev *dev, void b43legacy_debugfs_init(void) { rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL); - if (IS_ERR(rootdir)) - rootdir = NULL; } void b43legacy_debugfs_exit(void) From 9ae49980bdca37052ffadc10d6b63d6f98cdaae0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:31 +0100 Subject: [PATCH 0470/1436] brcmsmac: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Chi-Hsien Lin Cc: Wright Feng Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: brcm80211-dev-list.pdl@broadcom.com Cc: brcm80211-dev-list@cypress.com Signed-off-by: Greg Kroah-Hartman Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/debug.c | 26 +++---------------- .../broadcom/brcm80211/brcmsmac/debug.h | 2 +- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c index 3bd54f125776..6d776ef6ff54 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c @@ -37,27 +37,18 @@ static struct dentry *root_folder; void brcms_debugfs_init(void) { root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); - if (IS_ERR(root_folder)) - root_folder = NULL; } void brcms_debugfs_exit(void) { - if (!root_folder) - return; - debugfs_remove_recursive(root_folder); root_folder = NULL; } -int brcms_debugfs_attach(struct brcms_pub *drvr) +void brcms_debugfs_attach(struct brcms_pub *drvr) { - if (!root_folder) - return -ENODEV; - drvr->dbgfs_dir = debugfs_create_dir( dev_name(&drvr->wlc->hw->d11core->dev), root_folder); - return PTR_ERR_OR_ZERO(drvr->dbgfs_dir); } void brcms_debugfs_detach(struct brcms_pub *drvr) @@ -195,7 +186,7 @@ static const struct file_operations brcms_debugfs_def_ops = { .llseek = seq_lseek }; -static int +static void brcms_debugfs_add_entry(struct brcms_pub *drvr, const char *fn, int (*read_fn)(struct seq_file *seq, void *data)) { @@ -203,27 +194,18 @@ brcms_debugfs_add_entry(struct brcms_pub *drvr, const char *fn, struct dentry *dentry = drvr->dbgfs_dir; struct brcms_debugfs_entry *entry; - if (IS_ERR_OR_NULL(dentry)) - return -ENOENT; - entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL); if (!entry) - return -ENOMEM; + return; entry->read = read_fn; entry->drvr = drvr; - dentry = debugfs_create_file(fn, 0444, dentry, entry, - &brcms_debugfs_def_ops); - - return PTR_ERR_OR_ZERO(dentry); + debugfs_create_file(fn, 0444, dentry, entry, &brcms_debugfs_def_ops); } void brcms_debugfs_create_files(struct brcms_pub *drvr) { - if (IS_ERR_OR_NULL(drvr->dbgfs_dir)) - return; - brcms_debugfs_add_entry(drvr, "hardware", brcms_debugfs_hardware_read); brcms_debugfs_add_entry(drvr, "macstat", brcms_debugfs_macstat_read); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.h index 822781cf15d4..56898e6d789d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.h @@ -68,7 +68,7 @@ void __brcms_dbg(struct device *dev, u32 level, const char *func, struct brcms_pub; void brcms_debugfs_init(void); void brcms_debugfs_exit(void); -int brcms_debugfs_attach(struct brcms_pub *drvr); +void brcms_debugfs_attach(struct brcms_pub *drvr); void brcms_debugfs_detach(struct brcms_pub *drvr); struct dentry *brcms_debugfs_get_devdir(struct brcms_pub *drvr); void brcms_debugfs_create_files(struct brcms_pub *drvr); From b8f1fe51898373d34f4f59f54376a952a16c5dc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:32 +0100 Subject: [PATCH 0471/1436] rsi: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_debugfs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c index 8c6ca8e689e4..d0c35f3e2012 100644 --- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c +++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c @@ -297,11 +297,6 @@ int rsi_init_dbgfs(struct rsi_hw *adapter) dev_dbgfs->subdir = debugfs_create_dir(devdir, NULL); - if (!dev_dbgfs->subdir) { - kfree(dev_dbgfs); - return -ENOMEM; - } - for (ii = 0; ii < adapter->num_debugfs_entries; ii++) { files = &dev_debugfs_files[ii]; dev_dbgfs->rsi_files[ii] = From 2587791d57588562c21e5ef7e678f02ab2f3eb82 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:34 +0100 Subject: [PATCH 0472/1436] rt2x00: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Stanislaw Gruszka Cc: Helmut Schaa Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- .../net/wireless/ralink/rt2x00/rt2x00debug.c | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c index 61ba573e8bf1..05a2e8da412c 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c @@ -656,36 +656,24 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) intf->driver_folder = debugfs_create_dir(intf->rt2x00dev->ops->name, rt2x00dev->hw->wiphy->debugfsdir); - if (IS_ERR(intf->driver_folder) || !intf->driver_folder) - goto exit; intf->driver_entry = rt2x00debug_create_file_driver("driver", intf, &intf->driver_blob); - if (IS_ERR(intf->driver_entry) || !intf->driver_entry) - goto exit; intf->chipset_entry = rt2x00debug_create_file_chipset("chipset", intf, &intf->chipset_blob); - if (IS_ERR(intf->chipset_entry) || !intf->chipset_entry) - goto exit; intf->dev_flags = debugfs_create_file("dev_flags", 0400, intf->driver_folder, intf, &rt2x00debug_fop_dev_flags); - if (IS_ERR(intf->dev_flags) || !intf->dev_flags) - goto exit; intf->cap_flags = debugfs_create_file("cap_flags", 0400, intf->driver_folder, intf, &rt2x00debug_fop_cap_flags); - if (IS_ERR(intf->cap_flags) || !intf->cap_flags) - goto exit; intf->register_folder = debugfs_create_dir("register", intf->driver_folder); - if (IS_ERR(intf->register_folder) || !intf->register_folder) - goto exit; #define RT2X00DEBUGFS_CREATE_REGISTER_ENTRY(__intf, __name) \ ({ \ @@ -695,9 +683,6 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) 0600, \ (__intf)->register_folder, \ &(__intf)->offset_##__name); \ - if (IS_ERR((__intf)->__name##_off_entry) || \ - !(__intf)->__name##_off_entry) \ - goto exit; \ \ (__intf)->__name##_val_entry = \ debugfs_create_file(__stringify(__name) "_value", \ @@ -705,9 +690,6 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) (__intf)->register_folder, \ (__intf), \ &rt2x00debug_fop_##__name); \ - if (IS_ERR((__intf)->__name##_val_entry) || \ - !(__intf)->__name##_val_entry) \ - goto exit; \ } \ }) @@ -721,15 +703,10 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) intf->queue_folder = debugfs_create_dir("queue", intf->driver_folder); - if (IS_ERR(intf->queue_folder) || !intf->queue_folder) - goto exit; intf->queue_frame_dump_entry = debugfs_create_file("dump", 0400, intf->queue_folder, intf, &rt2x00debug_fop_queue_dump); - if (IS_ERR(intf->queue_frame_dump_entry) - || !intf->queue_frame_dump_entry) - goto exit; skb_queue_head_init(&intf->frame_dump_skbqueue); init_waitqueue_head(&intf->frame_dump_waitqueue); @@ -747,10 +724,6 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev) #endif return; - -exit: - rt2x00debug_deregister(rt2x00dev); - rt2x00_err(rt2x00dev, "Failed to register debug handler\n"); } void rt2x00debug_deregister(struct rt2x00_dev *rt2x00dev) From caac453a9be7f79a9ebf4d2efa08189fd68837e0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:36 +0100 Subject: [PATCH 0473/1436] libertas: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Kalle Valo Cc: libertas-dev@lists.infradead.org Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/libertas/debugfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/debugfs.c b/drivers/net/wireless/marvell/libertas/debugfs.c index c83f44f9ddf1..fe14814af300 100644 --- a/drivers/net/wireless/marvell/libertas/debugfs.c +++ b/drivers/net/wireless/marvell/libertas/debugfs.c @@ -708,8 +708,6 @@ void lbs_debugfs_init_one(struct lbs_private *priv, struct net_device *dev) goto exit; priv->debugfs_dir = debugfs_create_dir(dev->name, lbs_dir); - if (!priv->debugfs_dir) - goto exit; for (i=0; ievents_dir = debugfs_create_dir("subscribed_events", priv->debugfs_dir); - if (!priv->events_dir) - goto exit; for (i=0; iregs_dir = debugfs_create_dir("registers", priv->debugfs_dir); - if (!priv->regs_dir) - goto exit; for (i=0; i Date: Tue, 22 Jan 2019 16:21:37 +0100 Subject: [PATCH 0474/1436] mwifiex: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Cc: Ganapathi Bhat Cc: Xinming Hu Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/debugfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index cbe4493b3266..8ab114cf3467 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -922,9 +922,8 @@ mwifiex_reset_write(struct file *file, } #define MWIFIEX_DFS_ADD_FILE(name) do { \ - if (!debugfs_create_file(#name, 0644, priv->dfs_dev_dir, \ - priv, &mwifiex_dfs_##name##_fops)) \ - return; \ + debugfs_create_file(#name, 0644, priv->dfs_dev_dir, priv, \ + &mwifiex_dfs_##name##_fops); \ } while (0); #define MWIFIEX_DFS_FILE_OPS(name) \ From b6958ad021c1f46675aa6d3602ce0d241a45785d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 22 Jan 2019 16:39:34 +0100 Subject: [PATCH 0475/1436] mt7601u: do not use WARN_ON in the datapath Substitute WARN_ON with WARN_ON_ONCE in mt7601u_rx_next_seg_len routine Signed-off-by: Lorenzo Bianconi Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 7f3e3983b781..f7edeffb2b19 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -124,9 +124,9 @@ static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len) u16 dma_len = get_unaligned_le16(data); if (data_len < min_seg_len || - WARN_ON(!dma_len) || - WARN_ON(dma_len + MT_DMA_HDRS > data_len) || - WARN_ON(dma_len & 0x3)) + WARN_ON_ONCE(!dma_len) || + WARN_ON_ONCE(dma_len + MT_DMA_HDRS > data_len) || + WARN_ON_ONCE(dma_len & 0x3)) return 0; return MT_DMA_HDRS + dma_len; From 030b43671ae88f827a415ad35700705ffc10d10d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 26 Jan 2019 00:18:00 +0900 Subject: [PATCH 0476/1436] wireless: prefix header search paths with $(srctree)/ Currently, the Kbuild core manipulates header search paths in a crazy way [1]. To fix this mess, I want all Makefiles to add explicit $(srctree)/ to the search paths in the srctree. Some Makefiles are already written in that way, but not all. The goal of this work is to make the notation consistent, and finally get rid of the gross hacks. Having whitespaces after -I does not matter since commit 48f6e3cf5bc6 ("kbuild: do not drop -I without parameter"). I also removed one header search path in: drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile I was able to compile without it. [1]: https://patchwork.kernel.org/patch/9632347/ Signed-off-by: Masahiro Yamada Acked-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile | 4 ++-- drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile | 6 +++--- drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile | 4 +--- drivers/net/wireless/intel/iwlwifi/dvm/Makefile | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/Makefile | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8180/Makefile | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8187/Makefile | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile index 22fd95a736a8..f7cf3e5f4849 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile @@ -16,8 +16,8 @@ # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ccflags-y += \ - -Idrivers/net/wireless/broadcom/brcm80211/brcmfmac \ - -Idrivers/net/wireless/broadcom/brcm80211/include + -I $(srctree)/$(src) \ + -I $(srctree)/$(src)/../include obj-$(CONFIG_BRCMFMAC) += brcmfmac.o brcmfmac-objs += \ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile index ed83f33aceb7..482d7737764d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile @@ -16,9 +16,9 @@ # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ccflags-y := \ - -Idrivers/net/wireless/broadcom/brcm80211/brcmsmac \ - -Idrivers/net/wireless/broadcom/brcm80211/brcmsmac/phy \ - -Idrivers/net/wireless/broadcom/brcm80211/include + -I $(srctree)/$(src) \ + -I $(srctree)/$(src)/phy \ + -I $(srctree)/$(src)/../include brcmsmac-y := \ mac80211_if.o \ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile index 256c91f9ac4b..bb02c6220a88 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/Makefile @@ -15,9 +15,7 @@ # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -ccflags-y := \ - -Idrivers/net/wireless/broadcom/brcm80211/brcmutil \ - -Idrivers/net/wireless/broadcom/brcm80211/include +ccflags-y := -I $(srctree)/$(src)/../include obj-$(CONFIG_BRCMUTIL) += brcmutil.o brcmutil-objs = utils.o d11.o diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/Makefile b/drivers/net/wireless/intel/iwlwifi/dvm/Makefile index 702d42b2d452..0486b17d7c41 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/dvm/Makefile @@ -11,4 +11,4 @@ iwldvm-objs += rxon.o devices.o iwldvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwldvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o -ccflags-y += -I$(src)/../ +ccflags-y += -I $(srctree)/$(src)/../ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile index 87373b099fd9..30cbd981efbd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile +++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile @@ -9,4 +9,4 @@ iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o iwlmvm-$(CONFIG_PM) += d3.o -ccflags-y += -I$(src)/../ +ccflags-y += -I $(srctree)/$(src)/../ diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/Makefile b/drivers/net/wireless/realtek/rtl818x/rtl8180/Makefile index 2966681efaef..5d6b06d3c02c 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/Makefile +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/Makefile @@ -2,4 +2,4 @@ rtl818x_pci-objs := dev.o rtl8225.o sa2400.o max2820.o grf5101.o rtl8225se.o obj-$(CONFIG_RTL8180) += rtl818x_pci.o -ccflags-y += -Idrivers/net/wireless/realtek/rtl818x +ccflags-y += -I $(srctree)/$(src)/.. diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/Makefile b/drivers/net/wireless/realtek/rtl818x/rtl8187/Makefile index ff074912a095..95bac73ece7c 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/Makefile +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/Makefile @@ -2,4 +2,4 @@ rtl8187-objs := dev.o rtl8225.o leds.o rfkill.o obj-$(CONFIG_RTL8187) += rtl8187.o -ccflags-y += -Idrivers/net/wireless/realtek/rtl818x +ccflags-y += -I $(srctree)/$(src)/.. From b14e945bda8ae227d1bf2b1837c0c4a61721cd1a Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 31 Jan 2019 14:25:50 +0100 Subject: [PATCH 0477/1436] drm/sun4i: tcon: Prepare and enable TCON channel 0 clock at init When initializing clocks, a reference to the TCON channel 0 clock is obtained. However, the clock is never prepared and enabled later. Switching from simplefb to DRM actually disables the clock (that was usually configured by U-Boot) because of that. On the V3s, this results in a hang when writing to some mixer registers when switching over to DRM from simplefb. Fix this by preparing and enabling the clock when initializing other clocks. Waiting for sun4i_tcon_channel_enable to enable the clock is apparently too late and results in the same mixer register access hang. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190131132550.26355-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 0420f5c978b9..cf45d0f940f9 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -761,6 +761,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, return PTR_ERR(tcon->sclk0); } } + clk_prepare_enable(tcon->sclk0); if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -775,6 +776,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); } From 132fdc379eb143932d209a20fd581e1ce7630960 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 24 Jan 2019 17:28:37 +0000 Subject: [PATCH 0478/1436] arm64: Do not issue IPIs for user executable ptes Commit 3b8c9f1cdfc5 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") was aimed at fixing the I-cache invalidation for kernel mappings. However, it inadvertently caused all cache maintenance for user mappings via set_pte_at() -> __sync_icache_dcache() -> sync_icache_aliases() to call kick_all_cpus_sync(). Reported-by: Shijith Thotton Tested-by: Shijith Thotton Reported-by: Wandun Chen Fixes: 3b8c9f1cdfc5 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") Cc: # 4.19.x- Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 30695a868107..5c9073bace83 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -33,7 +33,11 @@ void sync_icache_aliases(void *kaddr, unsigned long len) __clean_dcache_area_pou(kaddr, len); __flush_icache_all(); } else { - flush_icache_range(addr, addr + len); + /* + * Don't issue kick_all_cpus_sync() after I-cache invalidation + * for user mappings. + */ + __flush_icache_range(addr, addr + len); } } From 8ea235932314311f15ea6cf65c1393ed7e31af70 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 27 Jan 2019 09:29:42 +0100 Subject: [PATCH 0479/1436] arm64: kaslr: ensure randomized quantities are clean also when kaslr is off Commit 1598ecda7b23 ("arm64: kaslr: ensure randomized quantities are clean to the PoC") added cache maintenance to ensure that global variables set by the kaslr init routine are not wiped clean due to cache invalidation occurring during the second round of page table creation. However, if kaslr_early_init() exits early with no randomization being applied (either due to the lack of a seed, or because the user has disabled kaslr explicitly), no cache maintenance is performed, leading to the same issue we attempted to fix earlier, as far as the module_alloc_base variable is concerned. Note that module_alloc_base cannot be initialized statically, because that would cause it to be subject to a R_AARCH64_RELATIVE relocation, causing it to be overwritten by the second round of KASLR relocation processing. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: # v4.6+ Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index ba6b41790fcd..b09b6f75f759 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -88,6 +88,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, From f2b3d8566d81deaca31f4e3163def0bea7746e11 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:55 +0000 Subject: [PATCH 0480/1436] arm64: kprobe: Always blacklist the KVM world-switch code On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text. __hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic. Move the __hyp_text check in the kprobes blacklist so it applies on VHE systems too, to cover the common code and guest enter/exit assembly. Fixes: 888b3c8720e0 ("arm64: Treat all entry code as non-kprobe-able") Reviewed-by: Christoffer Dall Signed-off-by: James Morse Acked-by: Masami Hiramatsu Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a5b338b2542..f17afb99890c 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -478,13 +478,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true; if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; } From 8fac5cbdfe0f01254d9d265c6aa1a95f94f58595 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:56 +0000 Subject: [PATCH 0481/1436] arm64: hyp-stub: Forbid kprobing of the hyp-stub The hyp-stub is loaded by the kernel's early startup code at EL2 during boot, before KVM takes ownership later. The hyp-stub's text is part of the regular kernel text, meaning it can be kprobed. A breakpoint in the hyp-stub causes the CPU to spin in el2_sync_invalid. Add it to the __hyp_text. Signed-off-by: James Morse Cc: stable@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e1261fbaa374..17f325ba831e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -28,6 +28,8 @@ #include .text + .pushsection .hyp.text, "ax" + .align 11 ENTRY(__hyp_stub_vectors) From f7daa9c8fd191724b9ab9580a7be55cd1a67d799 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:57 +0000 Subject: [PATCH 0482/1436] arm64: hibernate: Clean the __hyp_text to PoC after resume During resume hibernate restores all physical memory. Any memory that is accessed with the MMU disabled needs to be cleaned to the PoC. KVMs __hyp_text was previously ommitted as it runs with the MMU enabled, but now that the hyp-stub is located in this section, we must clean __hyp_text too. This ensures secondary CPUs that come online after hibernate has finished resuming, and load KVM via the freshly written hyp-stub see the correct instructions. Signed-off-by: James Morse Cc: stable@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 29cdc99688f3..9859e1178e6b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -299,8 +299,10 @@ int swsusp_arch_suspend(void) dcache_clean_range(__idmap_text_start, __idmap_text_end); /* Clean kvm setup code to PoC? */ - if (el2_reset_needed()) + if (el2_reset_needed()) { dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + dcache_clean_range(__hyp_text_start, __hyp_text_end); + } /* make the crash dump kernel image protected again */ crash_post_resume(); From 5b9633af298bfd1de650f6774d3fada546543101 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 31 Jan 2019 01:25:02 +0100 Subject: [PATCH 0483/1436] binderfs: remove separate device_initcall() binderfs should not have a separate device_initcall(). When a kernel is compiled with CONFIG_ANDROID_BINDERFS register the filesystem alongside CONFIG_ANDROID_IPC. This use-case is especially sensible when users specify CONFIG_ANDROID_IPC=y, CONFIG_ANDROID_BINDERFS=y and ANDROID_BINDER_DEVICES="". When CONFIG_ANDROID_BINDERFS=n then this always succeeds so there's no regression potential for legacy workloads. Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 7 ++++++- drivers/android/binder_internal.h | 9 +++++++++ drivers/android/binderfs.c | 4 +--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 57cf259de600..4d2b2ad1ee0e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5854,9 +5854,10 @@ static int __init init_binder_device(const char *name) static int __init binder_init(void) { int ret; - char *device_name, *device_names, *device_tmp; + char *device_name, *device_tmp; struct binder_device *device; struct hlist_node *tmp; + char *device_names = NULL; ret = binder_alloc_shrinker_init(); if (ret) @@ -5917,6 +5918,10 @@ static int __init binder_init(void) } } + ret = init_binderfs(); + if (ret) + goto err_init_binder_device_failed; + return ret; err_init_binder_device_failed: diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 7fb97f503ef2..045b3e42d98b 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -46,4 +46,13 @@ static inline bool is_binderfs_device(const struct inode *inode) } #endif +#ifdef CONFIG_ANDROID_BINDERFS +extern int __init init_binderfs(void); +#else +static inline int __init init_binderfs(void) +{ + return 0; +} +#endif + #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7a550104a722..e773f45d19d9 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -550,7 +550,7 @@ static struct file_system_type binder_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -static int __init init_binderfs(void) +int __init init_binderfs(void) { int ret; @@ -568,5 +568,3 @@ static int __init init_binderfs(void) return ret; } - -device_initcall(init_binderfs); From 70ed7148dadb812f2f7c9927e98ef3cf4869dfa9 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 1 Feb 2019 10:03:12 +0100 Subject: [PATCH 0484/1436] mic: vop: Fix use-after-free on remove KASAN detects a use-after-free when vop devices are removed. This problem was introduced by commit 0063e8bbd2b62d136 ("virtio_vop: don't kfree device on register failure"). That patch moved the freeing of the struct _vop_vdev to the release function, but failed to ensure that vop holds a reference to the device when it doesn't want it to go away. A kfree() was replaced with a put_device() in the unregistration path, but the last reference to the device is already dropped in unregister_virtio_device() so the struct is freed before vop is done with it. Fix it by holding a reference until cleanup is done. This is similar to the fix in virtio_pci in commit 2989be09a8a9d6 ("virtio_pci: fix use after free on release"). ================================================================== BUG: KASAN: use-after-free in vop_scan_devices+0xc6c/0xe50 [vop] Read of size 8 at addr ffff88800da18580 by task kworker/0:1/12 CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.0.0-rc4+ #53 Workqueue: events vop_hotplug_devices [vop] Call Trace: dump_stack+0x74/0xbb print_address_description+0x5d/0x2b0 ? vop_scan_devices+0xc6c/0xe50 [vop] kasan_report+0x152/0x1aa ? vop_scan_devices+0xc6c/0xe50 [vop] ? vop_scan_devices+0xc6c/0xe50 [vop] vop_scan_devices+0xc6c/0xe50 [vop] ? vop_loopback_free_irq+0x160/0x160 [vop_loopback] process_one_work+0x7c0/0x14b0 ? pwq_dec_nr_in_flight+0x2d0/0x2d0 ? do_raw_spin_lock+0x120/0x280 worker_thread+0x8f/0xbf0 ? __kthread_parkme+0x78/0xf0 ? process_one_work+0x14b0/0x14b0 kthread+0x2ae/0x3a0 ? kthread_park+0x120/0x120 ret_from_fork+0x3a/0x50 Allocated by task 12: kmem_cache_alloc_trace+0x13a/0x2a0 vop_scan_devices+0x473/0xe50 [vop] process_one_work+0x7c0/0x14b0 worker_thread+0x8f/0xbf0 kthread+0x2ae/0x3a0 ret_from_fork+0x3a/0x50 Freed by task 12: kfree+0x104/0x310 device_release+0x73/0x1d0 kobject_put+0x14f/0x420 unregister_virtio_device+0x32/0x50 vop_scan_devices+0x19d/0xe50 [vop] process_one_work+0x7c0/0x14b0 worker_thread+0x8f/0xbf0 kthread+0x2ae/0x3a0 ret_from_fork+0x3a/0x50 The buggy address belongs to the object at ffff88800da18008 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 1400 bytes inside of 2048-byte region [ffff88800da18008, ffff88800da18808) The buggy address belongs to the page: page:ffffea0000368600 count:1 mapcount:0 mapping:ffff88801440dbc0 index:0x0 compound_mapcount: 0 flags: 0x4000000000010200(slab|head) raw: 4000000000010200 ffffea0000378608 ffffea000037a008 ffff88801440dbc0 raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88800da18480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800da18500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88800da18580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800da18600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800da18680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 0063e8bbd2b62d136 ("virtio_vop: don't kfree device on register failure") Signed-off-by: Vincent Whitchurch Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/vop/vop_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 2bd57c2ca02b..9ee3fff3446a 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -589,6 +589,8 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d, int ret = -1; if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) { + struct device *dev = get_device(&vdev->vdev.dev); + dev_dbg(&vpdev->dev, "%s %d config_change %d type %d vdev %p\n", __func__, __LINE__, @@ -600,7 +602,7 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d, iowrite8(-1, &dc->h2c_vdev_db); if (status & VIRTIO_CONFIG_S_DRIVER_OK) wait_for_completion(&vdev->reset_done); - put_device(&vdev->vdev.dev); + put_device(dev); iowrite8(1, &dc->guest_ack); dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n", __func__, __LINE__, ioread8(&dc->guest_ack)); From 4bf13fdbc3567e571c20b415e4df70f564d54067 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 1 Feb 2019 09:45:09 +0100 Subject: [PATCH 0485/1436] mic: vop: Fix crash on remove The remove path contains a hack which depends on internal structures in other source files, similar to the one which was recently removed from the registration path. Since commit 1ce9e6055fa0 ("virtio_ring: introduce packed ring support"), this leads to a crash when vop devices are removed. The structure in question is only examined to get the virtual address of the allocated used page. Store that pointer locally instead to fix the crash. Fixes: 1ce9e6055fa0 ("virtio_ring: introduce packed ring support") Signed-off-by: Vincent Whitchurch Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/vop/vop_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 9ee3fff3446a..744757f541be 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -47,7 +47,8 @@ * @dc: Virtio device control * @vpdev: VOP device which is the parent for this virtio device * @vr: Buffer for accessing the VRING - * @used: Buffer for used + * @used_virt: Virtual address of used ring + * @used: DMA address of used ring * @used_size: Size of the used buffer * @reset_done: Track whether VOP reset is complete * @virtio_cookie: Cookie returned upon requesting a interrupt @@ -61,6 +62,7 @@ struct _vop_vdev { struct mic_device_ctrl __iomem *dc; struct vop_device *vpdev; void __iomem *vr[VOP_MAX_VRINGS]; + void *used_virt[VOP_MAX_VRINGS]; dma_addr_t used[VOP_MAX_VRINGS]; int used_size[VOP_MAX_VRINGS]; struct completion reset_done; @@ -260,12 +262,12 @@ static bool vop_notify(struct virtqueue *vq) static void vop_del_vq(struct virtqueue *vq, int n) { struct _vop_vdev *vdev = to_vopvdev(vq->vdev); - struct vring *vr = (struct vring *)(vq + 1); struct vop_device *vpdev = vdev->vpdev; dma_unmap_single(&vpdev->dev, vdev->used[n], vdev->used_size[n], DMA_BIDIRECTIONAL); - free_pages((unsigned long)vr->used, get_order(vdev->used_size[n])); + free_pages((unsigned long)vdev->used_virt[n], + get_order(vdev->used_size[n])); vring_del_virtqueue(vq); vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]); vdev->vr[n] = NULL; @@ -355,6 +357,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev, le16_to_cpu(config.num)); used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(vdev->used_size[index])); + vdev->used_virt[index] = used; if (!used) { err = -ENOMEM; dev_err(_vop_dev(vdev), "%s %d err %d\n", From d6d478aee003e19ef90321176552a8ad2929a47f Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 24 Jan 2019 13:53:05 -0800 Subject: [PATCH 0486/1436] apparmor: Fix aa_label_build() error handling for failed merges aa_label_merge() can return NULL for memory allocations failures make sure to handle and set the correct error in this case. Reported-by: Peng Hao Signed-off-by: John Johansen --- security/apparmor/domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 08c88de0ffda..11975ec8d566 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1444,7 +1444,10 @@ check: new = aa_label_merge(label, target, GFP_KERNEL); if (IS_ERR_OR_NULL(new)) { info = "failed to build target label"; - error = PTR_ERR(new); + if (!new) + error = -ENOMEM; + else + error = PTR_ERR(new); new = NULL; perms.allow = 0; goto audit; From c228d294f2040c3a5f5965ff04d4947d0bf6e7da Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 31 Jan 2019 11:10:20 -0800 Subject: [PATCH 0487/1436] x86: explicitly align IO accesses in memcpy_{to,from}io In commit 170d13ca3a2f ("x86: re-introduce non-generic memcpy_{to,from}io") I made our copy from IO space use a separate copy routine rather than rely on the generic memcpy. I did that because our generic memory copy isn't actually well-defined when it comes to internal access ordering or alignment, and will in fact depend on various CPUID flags. In particular, the default memcpy() for a modern Intel CPU will generally be just a "rep movsb", which works reasonably well for medium-sized memory copies of regular RAM, since the CPU will turn it into fairly optimized microcode. However, for non-cached memory and IO, "rep movs" ends up being horrendously slow and will just do the architectural "one byte at a time" accesses implied by the movsb. At the other end of the spectrum, if you _don't_ end up using the "rep movsb" code, you'd likely fall back to the software copy, which does overlapping accesses for the tail, and may copy things backwards. Again, for regular memory that's fine, for IO memory not so much. The thinking was that clearly nobody really cared (because things worked), but some people had seen horrible performance due to the byte accesses, so let's just revert back to our long ago version that dod "rep movsl" for the bulk of the copy, and then fixed up the potentially last few bytes of the tail with "movsw/b". Interestingly (and perhaps not entirely surprisingly), while that was our original memory copy implementation, and had been used before for IO, in the meantime many new users of memcpy_*io() had come about. And while the access patterns for the memory copy weren't well-defined (so arguably _any_ access pattern should work), in practice the "rep movsb" case had been very common for the last several years. In particular Jarkko Sakkinen reported that the memcpy_*io() change resuled in weird errors from his Geminilake NUC TPM module. And it turns out that the TPM TCG accesses according to spec require that the accesses be (a) done strictly sequentially (b) be naturally aligned otherwise the TPM chip will abort the PCI transaction. And, in fact, the tpm_crb.c driver did this: memcpy_fromio(buf, priv->rsp, 6); ... memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); which really should never have worked in the first place, but back before commit 170d13ca3a2f it *happened* to work, because the memcpy_fromio() would be expanded to a regular memcpy, and (a) gcc would expand the first memcpy in-line, and turn it into a 4-byte and a 2-byte read, and they happened to be in the right order, and the alignment was right. (b) gcc would call "memcpy()" for the second one, and the machines that had this TPM chip also apparently ended up always having ERMS ("Enhanced REP MOVSB/STOSB instructions"), so we'd use the "rep movbs" for that copy. In other words, basically by pure luck, the code happened to use the right access sizes in the (two different!) memcpy() implementations to make it all work. But after commit 170d13ca3a2f, both of the memcpy_fromio() calls resulted in a call to the routine with the consistent memory accesses, and in both cases it started out transferring with 4-byte accesses. Which worked for the first copy, but resulted in the second copy doing a 32-bit read at an address that was only 2-byte aligned. Jarkko is actually fixing the fragile code in the TPM driver, but since this is an excellent example of why we absolutely must not use a generic memcpy for IO accesses, _and_ an IO-specific one really should strive to align the IO accesses, let's do exactly that. Side note: Jarkko also noted that the driver had been used on ARM platforms, and had worked. That was because on 32-bit ARM, memcpy_*io() ends up always doing byte accesses, and on 64-bit ARM it first does byte accesses to align to 8-byte boundaries, and then does 8-byte accesses for the bulk. So ARM actually worked by design, and the x86 case worked by pure luck. We *might* want to make x86-64 do the 8-byte case too. That should be a pretty straightforward extension, but let's do one thing at a time. And generally MMIO accesses aren't really all that performance-critical, as shown by the fact that for a long time we just did them a byte at a time, and very few people ever noticed. Reported-and-tested-by: Jarkko Sakkinen Tested-by: Jerry Snitselaar Cc: David Laight Fixes: 170d13ca3a2f ("x86: re-introduce non-generic memcpy_{to,from}io") Signed-off-by: Linus Torvalds --- arch/x86/lib/iomem.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index 66894675f3c8..df50451d94ef 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -2,8 +2,11 @@ #include #include +#define movs(type,to,from) \ + asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory") + /* Originally from i386/string.h */ -static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n) +static __always_inline void rep_movs(void *to, const void *from, size_t n) { unsigned long d0, d1, d2; asm volatile("rep ; movsl\n\t" @@ -21,13 +24,37 @@ static __always_inline void __iomem_memcpy(void *to, const void *from, size_t n) void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { - __iomem_memcpy(to, (const void *)from, n); + if (unlikely(!n)) + return; + + /* Align any unaligned source IO */ + if (unlikely(1 & (unsigned long)from)) { + movs("b", to, from); + n--; + } + if (n > 1 && unlikely(2 & (unsigned long)from)) { + movs("w", to, from); + n-=2; + } + rep_movs(to, (const void *)from, n); } EXPORT_SYMBOL(memcpy_fromio); void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) { - __iomem_memcpy((void *)to, (const void *) from, n); + if (unlikely(!n)) + return; + + /* Align any unaligned destination IO */ + if (unlikely(1 & (unsigned long)to)) { + movs("b", to, from); + n--; + } + if (n > 1 && unlikely(2 & (unsigned long)to)) { + movs("w", to, from); + n-=2; + } + rep_movs((void *)to, (const void *) from, n); } EXPORT_SYMBOL(memcpy_toio); From 2aa958c99c7fd3162b089a1a56a34a0cdb778de1 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 18 Jan 2019 19:13:08 +0800 Subject: [PATCH 0488/1436] x86/kexec: Don't setup EFI info if EFI runtime is not enabled Kexec-ing a kernel with "efi=noruntime" on the first kernel's command line causes the following null pointer dereference: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] Call Trace: efi_runtime_map_copy+0x28/0x30 bzImage64_load+0x688/0x872 arch_kexec_kernel_image_load+0x6d/0x70 kimage_file_alloc_init+0x13e/0x220 __x64_sys_kexec_file_load+0x144/0x290 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Just skip the EFI info setup if EFI runtime services are not enabled. [ bp: Massage commit message. ] Suggested-by: Dave Young Signed-off-by: Kairui Song Signed-off-by: Borislav Petkov Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Andrew Morton Cc: Ard Biesheuvel Cc: bhe@redhat.com Cc: David Howells Cc: erik.schmauss@intel.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kexec@lists.infradead.org Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: Philipp Rudo Cc: rafael.j.wysocki@intel.com Cc: robert.moore@intel.com Cc: Thomas Gleixner Cc: x86-ml Cc: Yannik Sembritzki Link: https://lkml.kernel.org/r/20190118111310.29589-2-kasong@redhat.com --- arch/x86/kernel/kexec-bzimage64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 0d5efa34f359..53917a3ebf94 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0; From 294c149a209c6196c2de85f512b52ef50f519949 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Feb 2019 11:28:16 +0300 Subject: [PATCH 0489/1436] skge: potential memory corruption in skge_get_regs() The "p" buffer is 0x4000 bytes long. B3_RI_WTO_R1 is 0x190. The value of "regs->len" is in the 1-0x4000 range. The bug here is that "regs->len - B3_RI_WTO_R1" can be a negative value which would lead to memory corruption and an abrupt crash. Fixes: c3f8be961808 ("[PATCH] skge: expand ethtool debug register dump") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 04fd1f135011..654ac534b10e 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -152,8 +152,10 @@ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs, memset(p, 0, regs->len); memcpy_fromio(p, io, B3_RAM_ADDR); - memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, - regs->len - B3_RI_WTO_R1); + if (regs->len > B3_RI_WTO_R1) { + memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, + regs->len - B3_RI_WTO_R1); + } } /* Wake on Lan only supported on Yukon chips with rev 1 or above */ From ba59fb0273076637f0add4311faa990a5eec27c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Feb 2019 15:15:22 +0100 Subject: [PATCH 0490/1436] sctp: walk the list of asoc safely In sctp_sendmesg(), when walking the list of endpoint associations, the association can be dropped from the list, making the list corrupt. Properly handle this by using list_for_each_entry_safe() Fixes: 4910280503f3 ("sctp: add support for snd flag SCTP_SENDALL process in sendmsg") Reported-by: Secunia Research Tested-by: Secunia Research Signed-off-by: Greg Kroah-Hartman Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f93c3cf9e567..65d6d04546ae 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) From 1b5ba350784242eb1f899bcffd95d2c7cff61e84 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 21 Jan 2019 14:42:42 +0100 Subject: [PATCH 0491/1436] ARM: 8824/1: fix a migrating irq bug when hotplug cpu Arm TC2 fails cpu hotplug stress test. This issue was tracked down to a missing copy of the new affinity cpumask for the vexpress-spc interrupt into struct irq_common_data.affinity when the interrupt is migrated in migrate_one_irq(). Fix it by replacing the arm specific hotplug cpu migration with the generic irq code. This is the counterpart implementation to commit 217d453d473c ("arm64: fix a migrating irq bug when hotplug cpu"). Tested with cpu hotplug stress test on Arm TC2 (multi_v7_defconfig plus CONFIG_ARM_BIG_LITTLE_CPUFREQ=y and CONFIG_ARM_VEXPRESS_SPC_CPUFREQ=y). The vexpress-spc interrupt (irq=22) on this board is affine to CPU0. Its affinity cpumask now changes correctly e.g. from 0 to 1-4 when CPU0 is hotplugged out. Suggested-by: Marc Zyngier Signed-off-by: Dietmar Eggemann Acked-by: Marc Zyngier Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/irq.h | 1 - arch/arm/kernel/irq.c | 62 -------------------------------------- arch/arm/kernel/smp.c | 2 +- 4 files changed, 2 insertions(+), 64 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 664e918e2624..26524b75970a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1400,6 +1400,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP + select GENERIC_IRQ_MIGRATION help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index c883fcbe93b6..46d41140df27 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -25,7 +25,6 @@ #ifndef __ASSEMBLY__ struct irqaction; struct pt_regs; -extern void migrate_irqs(void); extern void asm_do_IRQ(unsigned int, struct pt_regs *); void handle_IRQ(unsigned int, struct pt_regs *); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 9908dacf9229..844861368cd5 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -109,64 +108,3 @@ int __init arch_probe_nr_irqs(void) return nr_irqs; } #endif - -#ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = irq_data_get_affinity_mask(d); - struct irq_chip *c; - bool ret = false; - - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; - ret = true; - } - - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(irq_data_get_affinity_mask(d), affinity); - - return ret; -} - -/* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any - * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. - */ -void migrate_irqs(void) -{ - unsigned int i; - struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); - - for_each_irq_desc(i, desc) { - bool affinity_broken; - - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - i, smp_processor_id()); - } - - local_irq_restore(flags); -} -#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3bf82232b1be..1d6f5ea522f4 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -254,7 +254,7 @@ int __cpu_disable(void) /* * OK - migrate IRQs away from this CPU */ - migrate_irqs(); + irq_migrate_all_off_this_cpu(); /* * Flush user cache and TLB mappings, and then remove this CPU From 7596175e99b3d4bce28022193efd954c201a782a Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Wed, 30 Jan 2019 06:59:00 -0800 Subject: [PATCH 0492/1436] enic: fix checksum validation for IPv6 In case of IPv6 pkts, ipv4_csum_ok is 0. Because of this, driver does not set skb->ip_summed. So IPv6 rx checksum is not offloaded. Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 60641e202534..9a7f70db20c7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1434,7 +1434,8 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, * csum is correct or is zero. */ if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && ipv4_csum_ok && outer_csum_ok) { + tcp_udp_csum_ok && outer_csum_ok && + (ipv4_csum_ok || ipv6)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = encap; } From 14d22d4d61e40623a7c5816728bfe55c322e779a Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 30 Jan 2019 18:51:00 +0100 Subject: [PATCH 0493/1436] net/smc: fix another sizeof to int comparison Comparing an int to a size, which is unsigned, causes the int to become unsigned, giving the wrong result. kernel_sendmsg can return a negative error code. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } From ca8dc1334a71d6081b09e18d55198a27e28fd44b Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Wed, 30 Jan 2019 18:51:01 +0100 Subject: [PATCH 0494/1436] net/smc: allow 16 byte pnetids in netlink policy Currently, users can only send pnetids with a maximum length of 15 bytes over the SMC netlink interface although the maximum pnetid length is 16 bytes. This patch changes the SMC netlink policy to accept 16 byte pnetids. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNETID_LEN - 1 + .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, From 77f838ace755d2f466536c44dac6c856f62cd901 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:02 +0100 Subject: [PATCH 0495/1436] net/smc: prevent races between smc_lgr_terminate() and smc_conn_free() To prevent races between smc_lgr_terminate() and smc_conn_free() add an extra check of the lgr field before accessing it, and cancel a delayed free_work when a new smc connection is created. This fixes the problem that free_work cleared the lgr variable but smc_lgr_terminate() or smc_conn_free() still access it in parallel. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..097c798983ca 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -628,6 +630,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } From 6889b36da78a21a312d8b462c1fa25a03c2ff192 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:03 +0100 Subject: [PATCH 0496/1436] net/smc: don't wait for send buffer space when data was already sent When there is no more send buffer space and at least 1 byte was already sent then return to user space. The wait is only done when no data was sent by the sendmsg() call. This fixes smc_tx_sendmsg() which tried to always send all user data and started to wait for free send buffer space when needed. During this wait the user space program was blocked in the sendmsg() call and hence not able to receive incoming data. When both sides were in such a situation then the connection stalled forever. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..f99951f3f7fd 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -165,12 +165,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { + if (send_done) + return send_done; rc = smc_tx_wait(smc, msg->msg_flags); - if (rc) { - if (send_done) - return send_done; + if (rc) goto out_err; - } continue; } From 51c5aba3b672c4285fca052817f34b22dc79dda7 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:04 +0100 Subject: [PATCH 0497/1436] net/smc: recvmsg and splice_read should return 0 after shutdown When a socket was connected and is now shut down for read, return 0 to indicate end of data in recvmsg and splice_read (like TCP) and do not return ENOTCONN. This behavior is required by the socket api. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..b04a813fc865 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1505,6 +1505,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1840,7 +1845,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) From 33f3fcc290671590821ff3c0c9396db1ec9b7d4c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:05 +0100 Subject: [PATCH 0498/1436] net/smc: do not wait under send_lock smc_cdc_get_free_slot() might wait for free transfer buffers when using SMC-R. This wait should not be done under the send_lock, which is a spin_lock. This fixes a cpu loop in parallel threads waiting for the send_lock. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f99951f3f7fd..36af3de731b9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -488,25 +488,23 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - spin_lock_bh(&conn->send_lock); rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - if (smc->sk.sk_err == ECONNABORTED) { - rc = sock_error(&smc->sk); - goto out_unlock; - } + if (smc->sk.sk_err == ECONNABORTED) + return sock_error(&smc->sk); rc = 0; if (conn->alert_token_local) /* connection healthy */ mod_delayed_work(system_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } - goto out_unlock; + return rc; } + spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { rc = smc_tx_rdma_writes(conn); if (rc) { From 2dee25af42f99b476d5916aeac5c994e4dcc910b Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:06 +0100 Subject: [PATCH 0499/1436] net/smc: call smc_cdc_msg_send() under send_lock Call smc_cdc_msg_send() under the connection send_lock to make sure all send operations for one connection are serialized. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..1c5333d494e9 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -125,7 +125,10 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) From e5f3aa04dbfd92154f21edfeb7a1676a45918928 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:07 +0100 Subject: [PATCH 0500/1436] net/smc: use device link provided in qp_context The device field of the IB event structure does not always point to the SMC IB device. Load the pointer from the qp_context which is always provided to smc_ib_qp_event_handler() in the priv field. And for qp events the affected port is given in the qp structure of the ibevent, derive it from there. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..76487a16934e 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -289,8 +289,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { @@ -298,7 +298,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) case IB_EVENT_GID_CHANGE: case IB_EVENT_PORT_ERR: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; + port_idx = ibevent->element.qp->port - 1; set_bit(port_idx, &smcibdev->port_event_mask); schedule_work(&smcibdev->port_event_work); break; From 46ad02229d6b4ee276eb295ca08f039993f323c8 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 30 Jan 2019 18:51:08 +0100 Subject: [PATCH 0501/1436] net/smc: fix use of variable in cleared area Do not use pend->idx as index for the arrays because its value is located in the cleared area. Use the existing local variable instead. Without this fix the wrong area might be cleared. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..1dc88c32d6bb 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -218,10 +218,10 @@ int smc_wr_tx_put_slot(struct smc_link *link, u32 idx = pend->idx; /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pend->idx], 0, - sizeof(link->wr_tx_pends[pend->idx])); - memset(&link->wr_tx_bufs[pend->idx], 0, - sizeof(link->wr_tx_bufs[pend->idx])); + memset(&link->wr_tx_pends[idx], 0, + sizeof(link->wr_tx_pends[idx])); + memset(&link->wr_tx_bufs[idx], 0, + sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); return 1; } From 9b1f19d810e92d6cdc68455fbc22d9f961a58ce1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jan 2019 11:39:41 -0800 Subject: [PATCH 0502/1436] dccp: fool proof ccid_hc_[rt]x_parse_options() Similarly to commit 276bdb82dedb ("dccp: check ccid before dereferencing") it is wise to test for a NULL ccid. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.0.0-rc3+ #37 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b kobject: 'loop5' (0000000080f78fc1): kobject_uevent_env RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 000000008db5e000 CR4: 00000000001406e0 kobject: 'loop5' (0000000080f78fc1): fill_kobj_path: path = '/devices/virtual/block/loop5' DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_rcv_state_process+0x2b6/0x1af6 net/dccp/input.c:654 dccp_v4_do_rcv+0x100/0x190 net/dccp/ipv4.c:688 sk_backlog_rcv include/net/sock.h:936 [inline] __sk_receive_skb+0x3a9/0xea0 net/core/sock.c:473 dccp_v4_rcv+0x10cb/0x1f80 net/dccp/ipv4.c:880 ip_protocol_deliver_rcu+0xb6/0xa20 net/ipv4/ip_input.c:208 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_local_deliver+0x1f0/0x740 net/ipv4/ip_input.c:255 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x1f4/0x2f0 net/ipv4/ip_input.c:414 NF_HOOK include/linux/netfilter.h:289 [inline] NF_HOOK include/linux/netfilter.h:283 [inline] ip_rcv+0xed/0x620 net/ipv4/ip_input.c:524 __netif_receive_skb_one_core+0x160/0x210 net/core/dev.c:4973 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5083 process_backlog+0x206/0x750 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x76d/0x1930 net/core/dev.c:6412 __do_softirq+0x30b/0xb11 kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:654 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646 smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: ---[ end trace 58a0ba03bea2c376 ]--- RIP: 0010:ccid_hc_tx_parse_options net/dccp/ccid.h:205 [inline] RIP: 0010:dccp_parse_options+0x8d9/0x12b0 net/dccp/options.c:233 Code: c5 0f b6 75 b3 80 38 00 0f 85 d6 08 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 b8 4c 8b b8 f8 07 00 00 4c 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85 95 08 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b RSP: 0018:ffff8880a94df0b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880858ac723 RCX: dffffc0000000000 RDX: 0000000000000100 RSI: 0000000000000007 RDI: 0000000000000001 RBP: ffff8880a94df140 R08: 0000000000000001 R09: ffff888061b83a80 R10: ffffed100c370752 R11: ffff888061b83a97 R12: 0000000000000026 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0defa33518 CR3: 0000000009871000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ccid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } From 42caa0edabd6a0a392ec36a5f0943924e4954311 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 30 Jan 2019 16:42:12 -0800 Subject: [PATCH 0503/1436] scsi: aic94xx: fix module loading The aic94xx driver is currently failing to load with errors like sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:03.0/0000:02:00.3/0000:07:02.0/revision' Because the PCI code had recently added a file named 'revision' to every PCI device. Fix this by renaming the aic94xx revision file to aic_revision. This is safe to do for us because as far as I can tell, there's nothing in userspace relying on the current aic94xx revision file so it can be renamed without breaking anything. Fixes: 702ed3be1b1b (PCI: Create revision file in sysfs) Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Signed-off-by: Martin K. Petersen --- drivers/scsi/aic94xx/aic94xx_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index f83f79b07b50..07efcb9b5b94 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -280,7 +280,7 @@ static ssize_t asd_show_dev_rev(struct device *dev, return snprintf(buf, PAGE_SIZE, "%s\n", asd_dev_rev[asd_ha->revision_id]); } -static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL); +static DEVICE_ATTR(aic_revision, S_IRUGO, asd_show_dev_rev, NULL); static ssize_t asd_show_dev_bios_build(struct device *dev, struct device_attribute *attr,char *buf) @@ -477,7 +477,7 @@ static int asd_create_dev_attrs(struct asd_ha_struct *asd_ha) { int err; - err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision); + err = device_create_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); if (err) return err; @@ -499,13 +499,13 @@ err_update_bios: err_biosb: device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); err_rev: - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); return err; } static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha) { - device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_aic_revision); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn); device_remove_file(&asd_ha->pcidev->dev, &dev_attr_update_bios); From 1723058eab19de741b80ac8ad25bfe2a00e02987 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 1 Feb 2019 14:19:57 -0800 Subject: [PATCH 0504/1436] mm, memory_hotplug: don't bail out in do_migrate_range() prematurely do_migrate_range() takes a memory range and tries to isolate the pages to put them into a list. This list will be later on used in migrate_pages() to know the pages we need to migrate. Currently, if we fail to isolate a single page, we put all already isolated pages back to their LRU and we bail out from the function. This is quite suboptimal, as this will force us to start over again because scan_movable_pages will give us the same range. If there is no chance that we can isolate that page, we will loop here forever. Issue debugged in [1] has proved that. During the debugging of that issue, it was noticed that if do_migrate_ranges() fails to isolate a single page, we will just discard the work we have done so far and bail out, which means that scan_movable_pages() will find again the same set of pages. Instead, we can just skip the error, keep isolating as much pages as possible and then proceed with the call to migrate_pages(). This will allow us to do as much work as possible at once. [1] https://lkml.org/lkml/2018/12/6/324 Michal said: : I still think that this doesn't give us a whole picture. Looping for : ever is a bug. Failing the isolation is quite possible and it should : be a ephemeral condition (e.g. a race with freeing the page or : somebody else isolating the page for whatever reason). And here comes : the disadvantage of the current implementation. We simply throw : everything on the floor just because of a ephemeral condition. The : racy page_count check is quite dubious to prevent from that. Link: http://lkml.kernel.org/r/20181211135312.27034-1-osalvador@suse.de Signed-off-by: Oscar Salvador Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Dan Williams Cc: Jan Kara Cc: Kirill A. Shutemov Cc: William Kucharski Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b9a667d36c55..d7b7d221c284 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1344,7 +1344,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; - int not_managed = 0; int ret = 0; LIST_HEAD(source); @@ -1392,7 +1391,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) else ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); if (!ret) { /* Success */ - put_page(page); list_add_tail(&page->lru, &source); if (!__PageMovable(page)) inc_node_page_state(page, NR_ISOLATED_ANON + @@ -1401,22 +1399,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) } else { pr_warn("failed to isolate pfn %lx\n", pfn); dump_page(page, "isolation failed"); - put_page(page); - /* Because we don't have big zone->lock. we should - check this again here. */ - if (page_count(page)) { - not_managed++; - ret = -EBUSY; - break; - } } + put_page(page); } if (!list_empty(&source)) { - if (not_managed) { - putback_movable_pages(&source); - goto out; - } - /* Allocate a new page from the nearest neighbor node */ ret = migrate_pages(&source, new_node_page, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); @@ -1429,7 +1415,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) putback_movable_pages(&source); } } -out: + return ret; } From 1fde6f21d90f8ba5da3cb9c54ca991ed72696c43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Feb 2019 14:20:01 -0800 Subject: [PATCH 0505/1436] proc: fix /proc/net/* after setns(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /proc entries under /proc/net/* can't be cached into dcache because setns(2) can change current net namespace. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: avoid vim miscolorization] [adobriyan@gmail.com: write test, add dummy ->d_revalidate hook: necessary if /proc/net/* is pinned at setns time] Link: http://lkml.kernel.org/r/20190108192350.GA12034@avx2 Link: http://lkml.kernel.org/r/20190107162336.GA9239@avx2 Fixes: 1da4d377f943fe4194ffb9fb9c26cc58fad4dd24 ("proc: revalidate misc dentries") Signed-off-by: Alexey Dobriyan Reported-by: Mateusz Stępień Reported-by: Ahmad Fatoum Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 4 +- fs/proc/internal.h | 1 + fs/proc/proc_net.c | 20 +++ tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/setns-dcache.c | 129 ++++++++++++++++++++ 6 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/proc/setns-dcache.c diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8ae109429a88..e39bac94dead 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_misc_dentry_ops); + d_set_d_op(dentry, de->proc_dops); return d_splice_alias(inode, dentry); } read_unlock(&proc_subdir_lock); @@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, INIT_LIST_HEAD(&ent->pde_openers); proc_set_user(ent, (*parent)->uid, (*parent)->gid); + ent->proc_dops = &proc_misc_dentry_ops; + out: return ent; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..95b14196f284 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,6 +44,7 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index d5e0fcb3439e..a7b12435519e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } +static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +static const struct dentry_operations proc_net_dentry_ops = { + .d_revalidate = proc_net_d_revalidate, + .d_delete = always_delete_dentry, +}; + +static void pde_force_lookup(struct proc_dir_entry *pde) +{ + /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ + pde->proc_dops = &proc_net_dentry_ops; +} + static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; @@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; return proc_register(parent, p); @@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; p->write = write; diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 82121a81681f..29bac5ef9a93 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,4 +10,5 @@ /proc-uptime-002 /read /self +/setns-dcache /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1c12c34cf85d..434d033ee067 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self +TEST_GEN_PROGS += setns-dcache TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c new file mode 100644 index 000000000000..60ab197a73fc --- /dev/null +++ b/tools/testing/selftests/proc/setns-dcache.c @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWNET) points to new /proc/net content even + * if old one is in dcache. + * + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. + */ +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty net namespaces. */ + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWNET) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/net/unix", O_RDONLY); + + if (setns(nsfd, CLONE_NEWNET) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/net/unix", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S "Num RefCount Protocol Flags Type St Inode Path\n" + rv = read(fd, buf, sizeof(buf)); + + assert(rv == strlen(S)); + assert(memcmp(buf, S, strlen(S)) == 0); + } + + return 0; +} From 36c0f7f0f89984bb21e6d0f92d776faf7be73096 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 1 Feb 2019 14:20:12 -0800 Subject: [PATCH 0506/1436] arch: unexport asm/shmparam.h for all architectures Most architectures do not export shmparam.h to user-space. $ find arch -name shmparam.h | sort arch/alpha/include/asm/shmparam.h arch/arc/include/asm/shmparam.h arch/arm64/include/asm/shmparam.h arch/arm/include/asm/shmparam.h arch/csky/include/asm/shmparam.h arch/ia64/include/asm/shmparam.h arch/mips/include/asm/shmparam.h arch/nds32/include/asm/shmparam.h arch/nios2/include/asm/shmparam.h arch/parisc/include/asm/shmparam.h arch/powerpc/include/asm/shmparam.h arch/s390/include/asm/shmparam.h arch/sh/include/asm/shmparam.h arch/sparc/include/asm/shmparam.h arch/x86/include/asm/shmparam.h arch/xtensa/include/asm/shmparam.h Strangely, some users of the asm-generic wrapper export shmparam.h $ git grep 'generic-y += shmparam.h' arch/c6x/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/h8300/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/hexagon/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/m68k/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/microblaze/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/openrisc/include/uapi/asm/Kbuild:generic-y += shmparam.h arch/riscv/include/asm/Kbuild:generic-y += shmparam.h arch/unicore32/include/uapi/asm/Kbuild:generic-y += shmparam.h The newly added riscv correctly creates the asm-generic wrapper in the kernel space, but the others (c6x, h8300, hexagon, m68k, microblaze, openrisc, unicore32) create the one in the uapi directory. Digging into the git history, now I guess fcc8487d477a ("uapi: export all headers under uapi directories") was the misconversion. Prior to that commit, no architecture exported to shmparam.h As its commit description said, that commit exported shmparam.h for c6x, h8300, hexagon, m68k, openrisc, unicore32. 83f0124ad81e ("microblaze: remove asm-generic wrapper headers") accidentally exported shmparam.h for microblaze. This commit unexports shmparam.h for those architectures. There is no more reason to export include/uapi/asm-generic/shmparam.h, so it has been moved to include/asm-generic/shmparam.h Link: http://lkml.kernel.org/r/1546904307-11124-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Stafford Horne Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Yoshinori Sato Cc: Richard Kuo Cc: Guan Xuetao Cc: Nicolas Dichtel Cc: Arnd Bergmann Cc: Aurelien Jacquiot Cc: Greentime Hu Cc: Guo Ren Cc: Palmer Dabbelt Cc: Stefan Kristiansson Cc: Mark Salter Cc: Albert Ou Cc: Jonas Bonn Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/c6x/include/asm/Kbuild | 1 + arch/c6x/include/uapi/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 + arch/h8300/include/uapi/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 + arch/hexagon/include/uapi/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 + arch/m68k/include/uapi/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 + arch/microblaze/include/uapi/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 + arch/openrisc/include/uapi/asm/Kbuild | 1 - arch/unicore32/include/asm/Kbuild | 1 + arch/unicore32/include/uapi/asm/Kbuild | 1 - 14 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 33a2c94fed0d..63b4a1705182 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -30,6 +30,7 @@ generic-y += pgalloc.h generic-y += preempt.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += tlbflush.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index cd400d353d18..961c1dc064e1 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += preempt.h generic-y += scatterlist.h generic-y += sections.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += spinlock.h generic-y += timex.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 47c4da3d64a4..b25fd42aa0f4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -30,6 +30,7 @@ generic-y += rwsem.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index 61d955c1747a..c1b06dcf6cf8 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -1,4 +1,3 @@ include include/uapi/asm-generic/Kbuild.asm -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 9f1dd26903e3..95f8f631c4df 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h generic-y += percpu.h generic-y += preempt.h generic-y += sections.h +generic-y += shmparam.h generic-y += spinlock.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index b8b3525271fa..960bf1e4be53 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm generated-y += unistd_32.h generic-y += kvm_para.h -generic-y += shmparam.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9c7d1d25bf3d..791cc8d54d0a 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -26,6 +26,7 @@ generic-y += parport.h generic-y += percpu.h generic-y += preempt.h generic-y += serial.h +generic-y += shmparam.h generic-y += syscalls.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 28823e3db825..97823ec46e97 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm generated-y += unistd_32.h generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index eb87cd8327c8..1f04844b6b82 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -34,6 +34,7 @@ generic-y += qrwlock_types.h generic-y += qrwlock.h generic-y += sections.h generic-y += segment.h +generic-y += shmparam.h generic-y += string.h generic-y += switch_to.h generic-y += topology.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1372553dc0a9..1d1544b6ca74 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -28,6 +28,7 @@ generic-y += preempt.h generic-y += sections.h generic-y += segment.h generic-y += serial.h +generic-y += shmparam.h generic-y += sizes.h generic-y += syscalls.h generic-y += topology.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 6c6f6301012e..0febf1a07c30 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h -generic-y += shmparam.h generic-y += ucontext.h From 1ac25013fb9e4ed595cd608a406191e93520881e Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 1 Feb 2019 14:20:16 -0800 Subject: [PATCH 0507/1436] mm/hugetlb.c: teach follow_hugetlb_page() to handle FOLL_NOWAIT hugetlb needs the same fix as faultin_nopage (which was applied in commit 96312e61282a ("mm/gup.c: teach get_user_pages_unlocked to handle FOLL_NOWAIT")) or KVM hangs because it thinks the mmap_sem was already released by hugetlb_fault() if it returned VM_FAULT_RETRY, but it wasn't in the FOLL_NOWAIT case. Link: http://lkml.kernel.org/r/20190109020203.26669-2-aarcange@redhat.com Fixes: ce53053ce378 ("kvm: switch get_user_page_nowait() to get_user_pages_unlocked()") Signed-off-by: Andrea Arcangeli Tested-by: "Dr. David Alan Gilbert" Reported-by: "Dr. David Alan Gilbert" Reviewed-by: Mike Kravetz Reviewed-by: Peter Xu Cc: Mike Rapoport Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df2e7dd5ff17..afef61656c1e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } if (ret & VM_FAULT_RETRY) { - if (nonblocking) + if (nonblocking && + !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *nonblocking = 0; *nr_pages = 0; /* From a8e911d13540487942d53137c156bd7707f66e5d Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:20 -0800 Subject: [PATCH 0508/1436] x86_64: increase stack size for KASAN_EXTRA If the kernel is configured with KASAN_EXTRA, the stack size is increasted significantly because this option sets "-fstack-reuse" to "none" in GCC [1]. As a result, it triggers stack overrun quite often with 32k stack size compiled using GCC 8. For example, this reproducer https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/madvise/madvise06.c triggers a "corrupted stack end detected inside scheduler" very reliably with CONFIG_SCHED_STACK_END_CHECK enabled. There are just too many functions that could have a large stack with KASAN_EXTRA due to large local variables that have been called over and over again without being able to reuse the stacks. Some noticiable ones are size 7648 shrink_page_list 3584 xfs_rmap_convert 3312 migrate_page_move_mapping 3312 dev_ethtool 3200 migrate_misplaced_transhuge_page 3168 copy_process There are other 49 functions are over 2k in size while compiling kernel with "-Wframe-larger-than=" even with a related minimal config on this machine. Hence, it is too much work to change Makefiles for each object to compile without "-fsanitize-address-use-after-scope" individually. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23 Although there is a patch in GCC 9 to help the situation, GCC 9 probably won't be released in a few months and then it probably take another 6-month to 1-year for all major distros to include it as a default. Hence, the stack usage with KASAN_EXTRA can be revisited again in 2020 when GCC 9 is everywhere. Until then, this patch will help users avoid stack overrun. This has already been fixed for arm64 for the same reason via 6e8830674ea ("arm64: kasan: Increase stack size for KASAN_EXTRA"). Link: http://lkml.kernel.org/r/20190109215209.2903-1-cai@lca.pw Signed-off-by: Qian Cai Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_64_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8f657286d599..0ce558a8150d 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -7,7 +7,11 @@ #endif #ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif From 8fb335e078378c8426fabeed1ebee1fbf915690c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 1 Feb 2019 14:20:24 -0800 Subject: [PATCH 0509/1436] kernel/exit.c: release ptraced tasks before zap_pid_ns_processes Currently, exit_ptrace() adds all ptraced tasks in a dead list, then zap_pid_ns_processes() waits on all tasks in a current pidns, and only then are tasks from the dead list released. zap_pid_ns_processes() can get stuck on waiting tasks from the dead list. In this case, we will have one unkillable process with one or more dead children. Thanks to Oleg for the advice to release tasks in find_child_reaper(). Link: http://lkml.kernel.org/r/20190110175200.12442-1-avagin@gmail.com Fixes: 7c8bd2322c7f ("exit: ptrace: shift "reap dead" code from exit_ptrace() to forget_original_parent()") Signed-off-by: Andrei Vagin Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 3fb7be001964..2639a30a8aa5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) return NULL; } -static struct task_struct *find_child_reaper(struct task_struct *father) +static struct task_struct *find_child_reaper(struct task_struct *father, + struct list_head *dead) __releases(&tasklist_lock) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *p, *n; if (likely(reaper != father)) return reaper; @@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) panic("Attempted to kill init! exitcode=0x%08x\n", father->signal->group_exit_code ?: father->exit_code); } + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + zap_pid_ns_processes(pid_ns); write_lock_irq(&tasklist_lock); @@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father, exit_ptrace(father, dead); /* Can drop and reacquire tasklist_lock */ - reaper = find_child_reaper(father); + reaper = find_child_reaper(father, dead); if (list_empty(&father->children)) return; From 80409c65e2c6cd1540045ee01fc55e50d95e0983 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Feb 2019 14:20:27 -0800 Subject: [PATCH 0510/1436] mm: migrate: make buffer_migrate_page_norefs() actually succeed Currently, buffer_migrate_page_norefs() was constantly failing because buffer_migrate_lock_buffers() grabbed reference on each buffer. In fact, there's no reason for buffer_migrate_lock_buffers() to grab any buffer references as the page is locked during all our operation and thus nobody can reclaim buffers from the page. So remove grabbing of buffer references which also makes buffer_migrate_page_norefs() succeed. Link: http://lkml.kernel.org/r/20190116131217.7226-1-jack@suse.cz Fixes: 89cb0888ca14 "mm: migrate: provide buffer_migrate_page_norefs()" Signed-off-by: Jan Kara Cc: Sergey Senozhatsky Cc: Pavel Machek Cc: Mel Gorman Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: David Rientjes Cc: Michal Hocko Cc: Zi Yan Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index a16b15090df3..712b231a7376 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* Simple case, sync compaction */ if (mode != MIGRATE_ASYNC) { do { - get_bh(bh); lock_buffer(bh); bh = bh->b_this_page; @@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, /* async case, we cannot block on lock_buffer so use trylock_buffer */ do { - get_bh(bh); if (!trylock_buffer(bh)) { /* * We failed to lock the buffer and cannot stall in * async migration. Release the taken locks */ struct buffer_head *failed_bh = bh; - put_bh(failed_bh); bh = head; while (bh != failed_bh) { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } return false; @@ -818,7 +814,6 @@ unlock_buffers: bh = head; do { unlock_buffer(bh); - put_bh(bh); bh = bh->b_this_page; } while (bh != head); From 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Feb 2019 14:20:31 -0800 Subject: [PATCH 0511/1436] oom, oom_reaper: do not enqueue same task twice Arkadiusz reported that enabling memcg's group oom killing causes strange memcg statistics where there is no task in a memcg despite the number of tasks in that memcg is not 0. It turned out that there is a bug in wake_oom_reaper() which allows enqueuing same task twice which makes impossible to decrease the number of tasks in that memcg due to a refcount leak. This bug existed since the OOM reaper became invokable from task_will_free_mem(current) path in out_of_memory() in Linux 4.7, T1@P1 |T2@P1 |T3@P1 |OOM reaper ----------+----------+----------+------------ # Processing an OOM victim in a different memcg domain. try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) out_of_memory() oom_kill_process(P1) do_send_sig_info(SIGKILL, @P1) mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T2@P1) wake_oom_reaper(T2@P1) # T2@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued again due to oom_reaper_list == T2@P1 && T1@P1->oom_reaper_list == NULL. mutex_unlock(&oom_lock) # Completed processing an OOM victim in a different memcg domain. spin_lock(&oom_reaper_lock) # T1P1 is dequeued. spin_unlock(&oom_reaper_lock) but memcg's group oom killing made it easier to trigger this bug by calling wake_oom_reaper() on the same task from one out_of_memory() request. Fix this bug using an approach used by commit 855b018325737f76 ("oom, oom_reaper: disable oom_reaper for oom_kill_allocating_task"). As a side effect of this patch, this patch also avoids enqueuing multiple threads sharing memory via task_will_free_mem(current) path. Link: http://lkml.kernel.org/r/e865a044-2c10-9858-f4ef-254bc71d6cc2@i-love.sakura.ne.jp Link: http://lkml.kernel.org/r/5ee34fc6-1485-34f8-8790-903ddabaa809@i-love.sakura.ne.jp Fixes: af8e15cc85a25315 ("oom, oom_reaper: do not enqueue task if it is on the oom_reaper_list head") Signed-off-by: Tetsuo Handa Reported-by: Arkadiusz Miskiewicz Tested-by: Arkadiusz Miskiewicz Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Tejun Heo Cc: Aleksa Sarai Cc: Jay Kamat Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/coredump.h | 1 + mm/oom_kill.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ec912d01126f..ecdc6542070f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ +#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f0e8cd9edb1a..059e617a1847 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -647,8 +647,8 @@ static int oom_reaper(void *unused) static void wake_oom_reaper(struct task_struct *tsk) { - /* tsk is already queued? */ - if (tsk == oom_reaper_list || tsk->oom_reaper_list) + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); From efad4e475c312456edb3c789d0996d12ed744c13 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 1 Feb 2019 14:20:34 -0800 Subject: [PATCH 0512/1436] mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone Patch series "mm, memory_hotplug: fix uninitialized pages fallouts", v2. Mikhail Zaslonko has posted fixes for the two bugs quite some time ago [1]. I have pushed back on those fixes because I believed that it is much better to plug the problem at the initialization time rather than play whack-a-mole all over the hotplug code and find all the places which expect the full memory section to be initialized. We have ended up with commit 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") merged and cause a regression [2][3]. The reason is that there might be memory layouts when two NUMA nodes share the same memory section so the merged fix is simply incorrect. In order to plug this hole we really have to be zone range aware in those handlers. I have split up the original patch into two. One is unchanged (patch 2) and I took a different approach for `removable' crash. [1] http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com [2] https://bugzilla.redhat.com/show_bug.cgi?id=1666948 [3] http://lkml.kernel.org/r/20190125163938.GA20411@dhcp22.suse.cz This patch (of 2): Mikhail has reported the following VM_BUG_ON triggered when reading sysfs removable state of a memory block: page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: is_mem_section_removable+0xb4/0x190 show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops The reason is that the memory block spans the zone boundary and we are stumbling over an unitialized struct page. Fix this by enforcing zone range in is_mem_section_removable so that we never run away from a zone. Link: http://lkml.kernel.org/r/20190128144506.15603-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Mikhail Zaslonko Debugged-by: Mikhail Zaslonko Tested-by: Gerald Schaefer Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Cc: Pavel Tatashin Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d7b7d221c284..91e6fef4cf9f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { From 24feb47c5fa5b825efb0151f28906dfdad027e61 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Fri, 1 Feb 2019 14:20:38 -0800 Subject: [PATCH 0513/1436] mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct pages access from test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers. Here are the the panic examples: CONFIG_DEBUG_VM_PGFLAGS=y kernel parameter mem=2050M -------------------------- page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: test_pages_in_a_zone+0xde/0x160 show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops Fix this by checking whether the pfn to check is within the zone. [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Link: http://lkml.kernel.org/r/20190128144506.15603-3-mhocko@kernel.org [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Signed-off-by: Michal Hocko Signed-off-by: Mikhail Zaslonko Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Tested-by: Gerald Schaefer Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Mikhail Gavrilov Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 91e6fef4cf9f..ecc5ee04e301 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1274,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; From 1b69ac6b40ebd85eed73e4dbccde2a36961ab990 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 1 Feb 2019 14:20:42 -0800 Subject: [PATCH 0514/1436] psi: fix aggregation idle shut-off psi has provisions to shut off the periodic aggregation worker when there is a period of no task activity - and thus no data that needs aggregating. However, while developing psi monitoring, Suren noticed that the aggregation clock currently won't stay shut off for good. Debugging this revealed a flaw in the idle design: an aggregation run will see no task activity and decide to go to sleep; shortly thereafter, the kworker thread that executed the aggregation will go idle and cause a scheduling change, during which the psi callback will kick the !pending worker again. This will ping-pong forever, and is equivalent to having no shut-off logic at all (but with more code!) Fix this by exempting aggregation workers from psi's clock waking logic when the state change is them going to sleep. To do this, tag workers with the last work function they executed, and if in psi we see a worker going to sleep after aggregating psi data, we will not reschedule the aggregation work item. What if the worker is also executing other items before or after? Any psi state times that were incurred by work items preceding the aggregation work will have been collected from the per-cpu buckets during the aggregation itself. If there are work items following the aggregation work, the worker's last_func tag will be overwritten and the aggregator will be kept alive to process this genuine new activity. If the aggregation work is the last thing the worker does, and we decide to go idle, the brief period of non-idle time incurred between the aggregation run and the kworker's dequeue will be stranded in the per-cpu buckets until the clock is woken by later activity. But that should not be a problem. The buckets can hold 4s worth of time, and future activity will wake the clock with a 2s delay, giving us 2s worth of data we can leave behind when disabling aggregation. If it takes a worker more than two seconds to go idle after it finishes its last work item, we likely have bigger problems in the system, and won't notice one sample that was averaged with a bogus per-CPU weight. Link: http://lkml.kernel.org/r/20190116193501.1910-1-hannes@cmpxchg.org Fixes: eb414681d5a0 ("psi: pressure stall information for CPU, memory, and IO") Signed-off-by: Johannes Weiner Reported-by: Suren Baghdasaryan Acked-by: Tejun Heo Cc: Peter Zijlstra Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 21 +++++++++++++++++---- kernel/workqueue.c | 23 +++++++++++++++++++++++ kernel/workqueue_internal.h | 6 +++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index fe24de3fbc93..c3484785b179 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -124,6 +124,7 @@ * sampling of the aggregate task states would be. */ +#include "../workqueue_internal.h" #include #include #include @@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu, groupc->tasks[t]++; write_seqcount_end(&groupc->seq); - - if (!delayed_work_pending(&group->clock_work)) - schedule_delayed_work(&group->clock_work, PSI_FREQ); } static struct psi_group *iterate_groups(struct task_struct *task, void **iter) @@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) { int cpu = task_cpu(task); struct psi_group *group; + bool wake_clock = true; void *iter = NULL; if (!task->pid) @@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set) task->psi_flags &= ~clear; task->psi_flags |= set; - while ((group = iterate_groups(task, &iter))) + /* + * Periodic aggregation shuts off if there is a period of no + * task changes, so we wake it back up if necessary. However, + * don't do this if the task change is the aggregation worker + * itself going to sleep, or we'll ping-pong forever. + */ + if (unlikely((clear & TSK_RUNNING) && + (task->flags & PF_WQ_WORKER) && + wq_worker_last_func(task) == psi_update_work)) + wake_clock = false; + + while ((group = iterate_groups(task, &iter))) { psi_group_change(group, cpu, clear, set); + if (wake_clock && !delayed_work_pending(&group->clock_work)) + schedule_delayed_work(&group->clock_work, PSI_FREQ); + } } void psi_memstall_tick(struct task_struct *task, int cpu) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..fc5d23d752a5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -909,6 +909,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) return to_wakeup ? to_wakeup->task : NULL; } +/** + * wq_worker_last_func - retrieve worker's last work function + * + * Determine the last function a worker executed. This is called from + * the scheduler to get a worker's last known identity. + * + * CONTEXT: + * spin_lock_irq(rq->lock) + * + * Return: + * The last work function %current executed as a worker, NULL if it + * hasn't executed any work yet. + */ +work_func_t wq_worker_last_func(struct task_struct *task) +{ + struct worker *worker = kthread_data(task); + + return worker->last_func; +} + /** * worker_set_flags - set worker flags and adjust nr_running accordingly * @worker: self @@ -2184,6 +2204,9 @@ __acquires(&pool->lock) if (unlikely(cpu_intensive)) worker_clr_flags(worker, WORKER_CPU_INTENSIVE); + /* tag the worker for identification in schedule() */ + worker->last_func = worker->current_func; + /* we're done with it, release */ hash_del(&worker->hentry); worker->current_work = NULL; diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 66fbb5a9e633..cb68b03ca89a 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -53,6 +53,9 @@ struct worker { /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ + + /* used by the scheduler to determine a worker's last known identity */ + work_func_t last_func; }; /** @@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void) /* * Scheduler hooks for concurrency managed workqueue. Only to be used from - * sched/core.c and workqueue.c. + * sched/ and workqueue.c. */ void wq_worker_waking_up(struct task_struct *task, int cpu); struct task_struct *wq_worker_sleeping(struct task_struct *task); +work_func_t wq_worker_last_func(struct task_struct *task); #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ From eeb0efd071d821a88da3fbd35f2d478f40d3b2ea Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 1 Feb 2019 14:20:47 -0800 Subject: [PATCH 0515/1436] mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages This is the same sort of error we saw in commit 17e2e7d7e1b8 ("mm, page_alloc: fix has_unmovable_pages for HugePages"). Gigantic hugepages cross several memblocks, so it can be that the page we get in scan_movable_pages() is a page-tail belonging to a 1G-hugepage. If that happens, page_hstate()->size_to_hstate() will return NULL, and we will blow up in hugepage_migration_supported(). The splat is as follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 1350 Comm: bash Tainted: G E 5.0.0-rc1-mm1-1-default+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:__offline_pages+0x6ae/0x900 Call Trace: memory_subsys_offline+0x42/0x60 device_offline+0x80/0xa0 state_store+0xab/0xc0 kernfs_fop_write+0x102/0x180 __vfs_write+0x26/0x190 vfs_write+0xad/0x1b0 ksys_write+0x42/0x90 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Modules linked in: af_packet(E) xt_tcpudp(E) ipt_REJECT(E) xt_conntrack(E) nf_conntrack(E) nf_defrag_ipv4(E) ip_set(E) nfnetlink(E) ebtable_nat(E) ebtable_broute(E) bridge(E) stp(E) llc(E) iptable_mangle(E) iptable_raw(E) iptable_security(E) ebtable_filter(E) ebtables(E) iptable_filter(E) ip_tables(E) x_tables(E) kvm_intel(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) bochs_drm(E) ttm(E) aesni_intel(E) drm_kms_helper(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) drm(E) virtio_net(E) syscopyarea(E) sysfillrect(E) net_failover(E) sysimgblt(E) pcspkr(E) failover(E) i2c_piix4(E) fb_sys_fops(E) parport_pc(E) parport(E) button(E) btrfs(E) libcrc32c(E) xor(E) zstd_decompress(E) zstd_compress(E) xxhash(E) raid6_pq(E) sd_mod(E) ata_generic(E) ata_piix(E) ahci(E) libahci(E) libata(E) crc32c_intel(E) serio_raw(E) virtio_pci(E) virtio_ring(E) virtio(E) sg(E) scsi_mod(E) autofs4(E) [akpm@linux-foundation.org: fix brace layout, per David. Reduce indentation] Link: http://lkml.kernel.org/r/20190122154407.18417-1-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: Anthony Yznaga Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ecc5ee04e301..a0130633a6e1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1305,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, static unsigned long scan_movable_pages(unsigned long start, unsigned long end) { unsigned long pfn; - struct page *page; + for (pfn = start; pfn < end; pfn++) { - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageLRU(page)) - return pfn; - if (__PageMovable(page)) - return pfn; - if (PageHuge(page)) { - if (hugepage_migration_supported(page_hstate(page)) && - page_huge_active(page)) - return pfn; - else - pfn = round_up(pfn + 1, - 1 << compound_order(page)) - 1; - } - } + struct page *page, *head; + unsigned long skip; + + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageLRU(page)) + return pfn; + if (__PageMovable(page)) + return pfn; + + if (!PageHuge(page)) + continue; + head = compound_head(page); + if (hugepage_migration_supported(page_hstate(head)) && + page_huge_active(head)) + return pfn; + skip = (1 << compound_order(head)) - (page - head); + pfn += skip - 1; } return 0; } From b13bc35193d9e7a8c050a24928ca5c9e7c9a009b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:51 -0800 Subject: [PATCH 0516/1436] mm/hotplug: invalid PFNs from pfn_to_online_page() On an arm64 ThunderX2 server, the first kmemleak scan would crash [1] with CONFIG_DEBUG_VM_PGFLAGS=y due to page_to_nid() found a pfn that is not directly mapped (MEMBLOCK_NOMAP). Hence, the page->flags is uninitialized. This is due to the commit 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") starts to use pfn_to_online_page() instead of pfn_valid(). However, in the CONFIG_MEMORY_HOTPLUG=y case, pfn_to_online_page() does not call memblock_is_map_memory() while pfn_valid() does. Historically, the commit 68709f45385a ("arm64: only consider memblocks with NOMAP cleared for linear mapping") causes pages marked as nomap being no long reassigned to the new zone in memmap_init_zone() by calling __init_single_page(). Since the commit 2d070eab2e82 ("mm: consider zone which is not fully populated to have holes") introduced pfn_to_online_page() and was designed to return a valid pfn only, but it is clearly broken on arm64. Therefore, let pfn_to_online_page() call pfn_valid_within(), so it can handle nomap thanks to the commit f52bb98f5ade ("arm64: mm: always enable CONFIG_HOLES_IN_ZONE"), while it will be optimized away on architectures where have no HOLES_IN_ZONE. [1] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000006 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 Internal error: Oops: 96000005 [#1] SMP CPU: 60 PID: 1408 Comm: kmemleak Not tainted 5.0.0-rc2+ #8 pstate: 60400009 (nZCv daif +PAN -UAO) pc : page_mapping+0x24/0x144 lr : __dump_page+0x34/0x3dc sp : ffff00003a5cfd10 x29: ffff00003a5cfd10 x28: 000000000000802f x27: 0000000000000000 x26: 0000000000277d00 x25: ffff000010791f56 x24: ffff7fe000000000 x23: ffff000010772f8b x22: ffff00001125f670 x21: ffff000011311000 x20: ffff000010772f8b x19: fffffffffffffffe x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffff802698b19600 x13: ffff802698b1a200 x12: ffff802698b16f00 x11: ffff802698b1a400 x10: 0000000000001400 x9 : 0000000000000001 x8 : ffff00001121a000 x7 : 0000000000000000 x6 : ffff0000102c53b8 x5 : 0000000000000000 x4 : 0000000000000003 x3 : 0000000000000100 x2 : 0000000000000000 x1 : ffff000010772f8b x0 : ffffffffffffffff Process kmemleak (pid: 1408, stack limit = 0x(____ptrval____)) Call trace: page_mapping+0x24/0x144 __dump_page+0x34/0x3dc dump_page+0x28/0x4c kmemleak_scan+0x4ac/0x680 kmemleak_scan_thread+0xb4/0xdc kthread+0x12c/0x13c ret_from_fork+0x10/0x18 Code: d503201f f9400660 36000040 d1000413 (f9400661) ---[ end trace 4d4bd7f573490c8e ]--- Kernel panic - not syncing: Fatal exception SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x002,20000c38 Memory Limit: none ---[ end Kernel panic - not syncing: Fatal exception ]--- Link: http://lkml.kernel.org/r/20190122132916.28360-1-cai@lca.pw Fixes: 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Catalin Marinas Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 07da5c6c5ba0..368267c1b71b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -21,14 +21,16 @@ struct vmem_altmap; * walkers which rely on the fully initialized page->flags and others * should use this rather than pfn_valid && pfn_to_page */ -#define pfn_to_online_page(pfn) \ -({ \ - struct page *___page = NULL; \ - unsigned long ___nr = pfn_to_section_nr(pfn); \ - \ - if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ - ___page = pfn_to_page(pfn); \ - ___page; \ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + unsigned long ___pfn = pfn; \ + unsigned long ___nr = pfn_to_section_nr(___pfn); \ + \ + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \ + pfn_valid_within(___pfn)) \ + ___page = pfn_to_page(___pfn); \ + ___page; \ }) /* From cefc7ef3c87d02fc9307835868ff721ea12cc597 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 1 Feb 2019 14:20:54 -0800 Subject: [PATCH 0517/1436] mm, oom: fix use-after-free in oom_kill_process Syzbot instance running on upstream kernel found a use-after-free bug in oom_kill_process. On further inspection it seems like the process selected to be oom-killed has exited even before reaching read_lock(&tasklist_lock) in oom_kill_process(). More specifically the tsk->usage is 1 which is due to get_task_struct() in oom_evaluate_task() and the put_task_struct within for_each_thread() frees the tsk and for_each_thread() tries to access the tsk. The easiest fix is to do get/put across the for_each_thread() on the selected task. Now the next question is should we continue with the oom-kill as the previously selected task has exited? However before adding more complexity and heuristics, let's answer why we even look at the children of oom-kill selected task? The select_bad_process() has already selected the worst process in the system/memcg. Due to race, the selected process might not be the worst at the kill time but does that matter? The userspace can use the oom_score_adj interface to prefer children to be killed before the parent. I looked at the history but it seems like this is there before git history. Link: http://lkml.kernel.org/r/20190121215850.221745-1-shakeelb@google.com Reported-by: syzbot+7fbbfa368521945f0e3d@syzkaller.appspotmail.com Fixes: 6b0c81b3be11 ("mm, oom: reduce dependency on tasklist_lock") Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Cc: David Rientjes Cc: Johannes Weiner Cc: Tetsuo Handa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 059e617a1847..26ea8636758f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) * still freeing memory. */ read_lock(&tasklist_lock); + + /* + * The task 'p' might have already exited before reaching here. The + * put_task_struct() will free task_struct 'p' while the loop still try + * to access the field of 'p', so, get an extra reference. + */ + get_task_struct(p); for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) } } } + put_task_struct(p); read_unlock(&tasklist_lock); /* From db7ddeab3ce5d64c9696e70d61f45ea9909cd196 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Feb 2019 14:20:58 -0800 Subject: [PATCH 0518/1436] lib/test_kmod.c: potential double free in error handling There is a copy and paste bug so we set "config->test_driver" to NULL twice instead of setting "config->test_fs". Smatch complains that it leads to a double free: lib/test_kmod.c:840 __kmod_config_init() warn: 'config->test_fs' double freed Link: http://lkml.kernel.org/r/20190121140011.GA14283@kadam Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter Acked-by: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index d82d022111e0..9cf77628fc91 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) config->test_driver = NULL; kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; } static void kmod_config_free(struct kmod_test_device *test_dev) From 980768338488921b15c2f01d67f0324a48ef8625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Feb 2019 14:21:01 -0800 Subject: [PATCH 0519/1436] init/Kconfig: fix grammar by moving a closing parenthesis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: http://lkml.kernel.org/r/20190129150813.15785-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Neuschäfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 513fa544a134..354146666d97 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -825,7 +825,7 @@ config CGROUP_PIDS PIDs controller is designed to stop this from happening. It should be noted that organisational operations (such as attaching - to a cgroup hierarchy will *not* be blocked by the PIDs controller), + to a cgroup hierarchy) will *not* be blocked by the PIDs controller, since the PIDs limit only affects a process's ability to fork, not to attach to a cgroup. From 0d0c8de8788b6c441ea01365612de7efc20cc682 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 1 Feb 2019 14:21:05 -0800 Subject: [PATCH 0520/1436] kasan: mark file common so ftrace doesn't trace it When option CONFIG_KASAN is enabled toghether with ftrace, function ftrace_graph_caller() gets in to a recursion, via functions kasan_check_read() and kasan_check_write(). Breakpoint 2, ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:179 179 mcount_get_pc x0 // function's pc (gdb) bt #0 ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:179 #1 0xffffff90101406c8 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:151 #2 0xffffff90106fd084 in kasan_check_write (p=0xffffffc06c170878, size=4) at ../mm/kasan/common.c:105 #3 0xffffff90104a2464 in atomic_add_return (v=, i=) at ./include/generated/atomic-instrumented.h:71 #4 atomic_inc_return (v=) at ./include/generated/atomic-fallback.h:284 #5 trace_graph_entry (trace=0xffffffc03f5ff380) at ../kernel/trace/trace_functions_graph.c:441 #6 0xffffff9010481774 in trace_graph_entry_watchdog (trace=) at ../kernel/trace/trace_selftest.c:741 #7 0xffffff90104a185c in function_graph_enter (ret=, func=, frame_pointer=18446743799894897728, retp=) at ../kernel/trace/trace_functions_graph.c:196 #8 0xffffff9010140628 in prepare_ftrace_return (self_addr=18446743592948977792, parent=0xffffffc03f5ff418, frame_pointer=18446743799894897728) at ../arch/arm64/kernel/ftrace.c:231 #9 0xffffff90101406f4 in ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:182 Backtrace stopped: previous frame identical to this frame (corrupt stack?) (gdb) Rework so that the kasan implementation isn't traced. Link: http://lkml.kernel.org/r/20181212183447.15890-1-anders.roxell@linaro.org Signed-off-by: Anders Roxell Acked-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Acked-by: Steven Rostedt (VMware) Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 0a14fcff70ed..e2bb06c1b45e 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n UBSAN_SANITIZE_tags.o := n KCOV_INSTRUMENT := n +CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_generic.o = -pg # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 From 6376360ecbe525a9c17b3d081dfd88ba3e4ed65b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 1 Feb 2019 14:21:08 -0800 Subject: [PATCH 0521/1436] mm: hwpoison: use do_send_sig_info() instead of force_sig() Currently memory_failure() is racy against process's exiting, which results in kernel crash by null pointer dereference. The root cause is that memory_failure() uses force_sig() to forcibly kill asynchronous (meaning not in the current context) processes. As discussed in thread https://lkml.org/lkml/2010/6/8/236 years ago for OOM fixes, this is not a right thing to do. OOM solves this issue by using do_send_sig_info() as done in commit d2d393099de2 ("signal: oom_kill_task: use SEND_SIG_FORCED instead of force_sig()"), so this patch is suggesting to do the same for hwpoison. do_send_sig_info() properly accesses to siglock with lock_task_sighand(), so is free from the reported race. I confirmed that the reported bug reproduces with inserting some delay in kill_procs(), and it never reproduces with this patch. Note that memory_failure() can send another type of signal using force_sig_mceerr(), and the reported race shouldn't happen on it because force_sig_mceerr() is called only for synchronous processes (i.e. BUS_MCEERR_AR happens only when some process accesses to the corrupted memory.) Link: http://lkml.kernel.org/r/20190116093046.GA29835@hori1.linux.bs1.fc.nec.co.jp Signed-off-by: Naoya Horiguchi Reported-by: Jane Chu Reviewed-by: Dan Williams Reviewed-by: William Kucharski Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7c72f2a95785..831be5ff5f4d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, if (fail || tk->addr_valid == 0) { pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pfn, tk->tsk->comm, tk->tsk->pid); - force_sig(SIGKILL, tk->tsk); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, + tk->tsk, PIDTYPE_PID); } /* From e3df4c6e4836ce93cd5cf92d9cbdeaf4439a0241 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 1 Feb 2019 14:21:12 -0800 Subject: [PATCH 0522/1436] mm, memory_hotplug: __offline_pages fix wrong locking Jan has noticed that we do double unlock on some failure paths when offlining a page range. This is indeed the case when test_pages_in_a_zone respp. start_isolate_page_range fail. This was an omission when forward porting the debugging patch from an older kernel. Fix the issue by dropping mem_hotplug_done from the failure condition and keeping the single unlock in the catch all failure path. Link: http://lkml.kernel.org/r/20190115120307.22768-1-mhocko@kernel.org Fixes: 7960509329c2 ("mm, memory_hotplug: print reason for the offlining failure") Signed-off-by: Michal Hocko Reported-by: Jan Kara Reviewed-by: Jan Kara Tested-by: Jan Kara Reviewed-by: Oscar Salvador Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a0130633a6e1..124e794867c5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1570,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) { - mem_hotplug_done(); ret = -EINVAL; reason = "multizone range"; goto failed_removal; @@ -1585,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); if (ret) { - mem_hotplug_done(); reason = "failure to isolate range"; goto failed_removal; } From 7b2489d37e1e355228f7c55724f77580e1dec22a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 1 Feb 2019 14:21:15 -0800 Subject: [PATCH 0523/1436] psi: clarify the Kconfig text for the default-disable option The current help text caused some confusion in online forums about whether or not to default-enable or default-disable psi in vendor kernels. This is because it doesn't communicate the reason for why we made this setting configurable in the first place: that the overhead is non-zero in an artificial scheduler stress test. Since this isn't representative of real workloads, and the effect was not measurable in scheduler-heavy real world applications such as the webservers and memcache installations at Facebook, it's fair to point out that this is a pretty cautious option to select. Link: http://lkml.kernel.org/r/20190129233617.16767-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Andrew Morton Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 354146666d97..c9386a365eea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED per default but can be enabled through passing psi=1 on the kernel commandline during boot. + This feature adds some code to the task wakeup and sleep + paths of the scheduler. The overhead is too low to affect + common scheduling-intense workloads in practice (such as + webservers, memcache), but it does show up in artificial + scheduler stress tests, such as hackbench. + + If you are paranoid and not sure what the kernel will be + used for, say Y. + + Say N if unsure. + endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION From e0a352fabce61f730341d119fbedf71ffdb8663f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 1 Feb 2019 14:21:19 -0800 Subject: [PATCH 0524/1436] mm: migrate: don't rely on __PageMovable() of newpage after unlocking it We had a race in the old balloon compaction code before b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature") refactored it that became visible after backporting 195a8c43e93d ("virtio-balloon: deflate via a page list") without the refactoring. The bug existed from commit d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") till b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature"). d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") was backported to 3.12, so the broken kernels are stable kernels [3.12 - 4.7]. There was a subtle race between dropping the page lock of the newpage in __unmap_and_move() and checking for __is_movable_balloon_page(newpage). Just after dropping this page lock, virtio-balloon could go ahead and deflate the newpage, effectively dequeueing it and clearing PageBalloon, in turn making __is_movable_balloon_page(newpage) fail. This resulted in dropping the reference of the newpage via putback_lru_page(newpage) instead of put_page(newpage), leading to page->lru getting modified and a !LRU page ending up in the LRU lists. With 195a8c43e93d ("virtio-balloon: deflate via a page list") backported, one would suddenly get corrupted lists in release_pages_balloon(): - WARNING: CPU: 13 PID: 6586 at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0 - list_del corruption. prev->next should be ffffe253961090a0, but was dead000000000100 Nowadays this race is no longer possible, but it is hidden behind very ugly handling of __ClearPageMovable() and __PageMovable(). __ClearPageMovable() will not make __PageMovable() fail, only PageMovable(). So the new check (__PageMovable(newpage)) will still hold even after newpage was dequeued by virtio-balloon. If anybody would ever change that special handling, the BUG would be introduced again. So instead, make it explicit and use the information of the original isolated page before migration. This patch can be backported fairly easy to stable kernels (in contrast to the refactoring). Link: http://lkml.kernel.org/r/20190129233217.10747-1-david@redhat.com Fixes: d6d86c0a7f8d ("mm/balloon_compaction: redesign ballooned pages management") Signed-off-by: David Hildenbrand Reported-by: Vratislav Bendel Acked-by: Michal Hocko Acked-by: Rafael Aquini Cc: Mel Gorman Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Dominik Brodowski Cc: Matthew Wilcox Cc: Vratislav Bendel Cc: Rafael Aquini Cc: Konstantin Khlebnikov Cc: Minchan Kim Cc: [3.12 - 4.7] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 712b231a7376..d4fd680be3b0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1130,10 +1130,13 @@ out: * If migration is successful, decrease refcount of the newpage * which will not free the page because new page owner increased * refcounter. As well, if it is LRU page, add the page to LRU - * list in here. + * list in here. Use the old state of the isolated source page to + * determine if we migrated a LRU page. newpage was already unlocked + * and possibly modified by its owner - don't rely on the page + * state. */ if (rc == MIGRATEPAGE_SUCCESS) { - if (unlikely(__PageMovable(newpage))) + if (unlikely(!is_lru)) put_page(newpage); else putback_lru_page(newpage); From c27d82f52f75fc9d8d9d40d120d2a96fdeeada5e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Feb 2019 14:21:23 -0800 Subject: [PATCH 0525/1436] fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() When superblock has lots of inodes without any pagecache (like is the case for /proc), drop_pagecache_sb() will iterate through all of them without dropping sb->s_inode_list_lock which can lead to softlockups (one of our customers hit this). Fix the problem by going to the slow path and doing cond_resched() in case the process needs rescheduling. Link: http://lkml.kernel.org/r/20190114085343.15011-1-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f..d31b6c72b476 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); + cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; From 63ce5f552beb9bdb41546b3a26c4374758b21815 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 1 Feb 2019 14:21:26 -0800 Subject: [PATCH 0526/1436] autofs: drop dentry reference only when it is never used autofs_expire_run() calls dput(dentry) to drop the reference count of dentry. However, dentry is read via autofs_dentry_ino(dentry) after that. This may result in a use-free-bug. The patch drops the reference count of dentry only when it is never used. Link: http://lkml.kernel.org/r/154725122396.11260.16053424107144453867.stgit@pluto-themaw-net Signed-off-by: Pan Bian Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/expire.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index d441244b79df..28d9c2b1b3bb 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry); if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) ret = -EFAULT; @@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); + dput(dentry); + return ret; } From f585b283e3f025754c45bbe7533fc6e5c4643700 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 1 Feb 2019 14:21:29 -0800 Subject: [PATCH 0527/1436] autofs: fix error return in autofs_fill_super() In autofs_fill_super() on error of get inode/make root dentry the return should be ENOMEM as this is the only failure case of the called functions. Link: http://lkml.kernel.org/r/154725123240.11260.796773942606871359.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 0e8ea2d9a2bb..078992eee299 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL; root->d_fsdata = ino; From e6d429313ea5c776d2e76b4494df69102e6b7115 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 29 Jan 2019 17:44:36 -0500 Subject: [PATCH 0528/1436] x86/resctrl: Avoid confusion over the new X86_RESCTRL config "Resource Control" is a very broad term for this CPU feature, and a term that is also associated with containers, cgroups etc. This can easily cause confusion. Make the user prompt more specific. Match the config symbol name. [ bp: In the future, the corresponding ARM arch-specific code will be under ARM_CPU_RESCTRL and the arch-agnostic bits will be carved out under the CPU_RESCTRL umbrella symbol. ] Signed-off-by: Johannes Weiner Signed-off-by: Borislav Petkov Cc: Babu Moger Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Morse Cc: Jonathan Corbet Cc: "Kirill A. Shutemov" Cc: linux-doc@vger.kernel.org Cc: Peter Zijlstra Cc: Pu Wen Cc: Reinette Chatre Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/20190130195621.GA30653@cmpxchg.org --- Documentation/x86/resctrl_ui.txt | 2 +- arch/x86/Kconfig | 6 +++--- arch/x86/include/asm/resctrl_sched.h | 4 ++-- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/resctrl/Makefile | 4 ++-- include/linux/sched.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt index e8e8d14d3c4e..c1f95b59e14d 100644 --- a/Documentation/x86/resctrl_ui.txt +++ b/Documentation/x86/resctrl_ui.txt @@ -9,7 +9,7 @@ Fenghua Yu Tony Luck Vikas Shivappa -This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo +This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo flag bits: RDT (Resource Director Technology) Allocation - "rdt_a" CAT (Cache Allocation Technology) - "cat_l3", "cat_l2" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 26387c7bf305..68261430fe6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -446,12 +446,12 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. -config X86_RESCTRL - bool "Resource Control support" +config X86_CPU_RESCTRL + bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS help - Enable Resource Control support. + Enable x86 CPU resource control support. Provide support for the allocation and monitoring of system resources usage by the CPU. diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h index 40ebddde6ac2..f6b7fe2833cc 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl_sched.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_RESCTRL_SCHED_H #define _ASM_X86_RESCTRL_SCHED_H -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL #include #include @@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void) static inline void resctrl_sched_in(void) {} -#endif /* CONFIG_X86_RESCTRL */ +#endif /* CONFIG_X86_CPU_RESCTRL */ #endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b6fa0869f7aa..cfd24f9f7614 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_RESCTRL) += resctrl/ +obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 1cabe6fd8e11..4a06c37b9cf1 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/include/linux/sched.h b/include/linux/sched.h index 224666226e87..8c328b14c424 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -995,7 +995,7 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif From dc3f595b6617ebc0307e0ce151e8f2f2b2489b95 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 23 Jan 2019 16:33:47 +0000 Subject: [PATCH 0529/1436] dmaengine: at_xdmac: Fix wrongfull report of a channel as in use atchan->status variable is used to store two different information: - pass channel interrupts status from interrupt handler to tasklet; - channel information like whether it is cyclic or paused; This causes a bug when device_terminate_all() is called, (AT_XDMAC_CHAN_IS_CYCLIC cleared on atchan->status) and then a late End of Block interrupt arrives (AT_XDMAC_CIS_BIS), which sets bit 0 of atchan->status. Bit 0 is also used for AT_XDMAC_CHAN_IS_CYCLIC, so when a new descriptor for a cyclic transfer is created, the driver reports the channel as in use: if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { dev_err(chan2dev(chan), "channel currently used\n"); return NULL; } This patch fixes the bug by adding a different struct member to keep the interrupts status separated from the channel status bits. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Codrin Ciubotariu Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 4e557684f792..fe69dccfa0c0 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data) if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd; - if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); spin_lock(&atchan->lock); @@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); - if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); tasklet_schedule(&atchan->tasklet); From 74c953ca5f6b4d5f1daa1ef34f4317e15c1a2987 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 2 Feb 2019 10:50:17 +0100 Subject: [PATCH 0530/1436] efi/arm64: Fix debugfs crash by adding a terminator for ptdump marker When reading 'efi_page_tables' debugfs triggers an out-of-bounds access here: arch/arm64/mm/dump.c: 282 if (addr >= st->marker[1].start_address) { called from: arch/arm64/mm/dump.c: 331 note_page(st, addr, 2, pud_val(pud)); because st->marker++ is is called after "UEFI runtime end" which is the last element in addr_marker[]. Therefore, add a terminator like the one for kernel_page_tables, so it can be skipped to print out non-existent markers. Here's the KASAN bug report: # cat /sys/kernel/debug/efi_page_tables ---[ UEFI runtime start ]--- 0x0000000020000000-0x0000000020010000 64K PTE RW NX SHD AF ... 0x0000000020200000-0x0000000021340000 17664K PTE RW NX SHD AF ... ... 0x0000000021920000-0x0000000021950000 192K PTE RW x SHD AF ... 0x0000000021950000-0x00000000219a0000 320K PTE RW NX SHD AF ... ---[ UEFI runtime end ]--- ---[ (null) ]--- ---[ (null) ]--- BUG: KASAN: global-out-of-bounds in note_page+0x1f0/0xac0 Read of size 8 at addr ffff2000123f2ac0 by task read_all/42464 Call trace: dump_backtrace+0x0/0x298 show_stack+0x24/0x30 dump_stack+0xb0/0xdc print_address_description+0x64/0x2b0 kasan_report+0x150/0x1a4 __asan_report_load8_noabort+0x30/0x3c note_page+0x1f0/0xac0 walk_pgd+0xb4/0x244 ptdump_walk_pgd+0xec/0x140 ptdump_show+0x40/0x50 seq_read+0x3f8/0xad0 full_proxy_read+0x9c/0xc0 __vfs_read+0xfc/0x4c8 vfs_read+0xec/0x208 ksys_read+0xd0/0x15c __arm64_sys_read+0x84/0x94 el0_svc_handler+0x258/0x304 el0_svc+0x8/0xc The buggy address belongs to the variable: __compound_literal.0+0x20/0x800 Memory state around the buggy address: ffff2000123f2980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff2000123f2a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fa >ffff2000123f2a80: fa fa fa fa 00 00 00 00 fa fa fa fa 00 00 00 00 ^ ffff2000123f2b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff2000123f2b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0 [ ardb: fix up whitespace ] [ mingo: fix up some moar ] Signed-off-by: Qian Cai Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 9d80448ac92b ("efi/arm64: Add debugfs node to dump UEFI runtime page tables") Link: http://lkml.kernel.org/r/20190202095017.13799-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-runtime.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 23ea1ed409d1..352bd2473162 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -37,8 +37,9 @@ extern u64 efi_system_table; static struct ptdump_info efi_ptdump_info = { .mm = &efi_mm, .markers = (struct addr_marker[]){ - { 0, "UEFI runtime start" }, - { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" } + { 0, "UEFI runtime start" }, + { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }, + { -1, NULL } }, .base_addr = 0, }; From 7aea8a9d71d54f449f49e20324df06341cc18395 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 1 Feb 2019 16:49:30 +0900 Subject: [PATCH 0531/1436] ASoC: rsnd: fixup MIX kctrl registration Renesas sound device has many IPs and many situations. If platform/board uses MIXer, situation will be more complex. To avoid duplicate DVC kctrl registration when MIXer was used, it had original flags. But it was issue when sound card was re-binded, because no one can't cleanup this flags then. To solve this issue, commit 9c698e8481a15237a ("ASoC: rsnd: tidyup registering method for rsnd_kctrl_new()") checks registered card->controls, because if card was re-binded, these were cleanuped automatically. This patch could solve re-binding issue. But, it start to avoid MIX kctrl. To solve these issues, we need below. To avoid card re-binding issue: check registered card->controls To avoid duplicate DVC registration: check registered rsnd_kctrl_cfg To allow multiple MIX registration: check registered rsnd_kctrl_cfg This patch do it. Fixes: 9c698e8481a15237a ("ASoC: rsnd: tidyup registering method for rsnd_kctrl_new()") Reported-by: Jiada Wang Signed-off-by: Kuninori Morimoto Tested-By: Jiada Wang Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 59e250cc2e9d..e819e965e1db 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1526,14 +1526,14 @@ int rsnd_kctrl_new(struct rsnd_mod *mod, int ret; /* - * 1) Avoid duplicate register (ex. MIXer case) - * 2) re-register if card was rebinded + * 1) Avoid duplicate register for DVC with MIX case + * 2) Allow duplicate register for MIX + * 3) re-register if card was rebinded */ list_for_each_entry(kctrl, &card->controls, list) { struct rsnd_kctrl_cfg *c = kctrl->private_data; - if (strcmp(kctrl->id.name, name) == 0 && - c->mod == mod) + if (c == cfg) return 0; } From 52abe6cc1866ac3d54612f5d80563e6608c0ddfc Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 1 Feb 2019 11:05:13 -0600 Subject: [PATCH 0532/1436] ASoC: topology: fix oops/use-after-free case with dai driver rmmod/modprobe tests expose a kernel oops when accessing the dai driver pointer. This comes from the topology design which operates in multiple passes. Each object removal happens at a specific iteration, and the code checks for the iteration (order) number after the memory containing the order was freed. Fix this be clearing a reference to the dai driver and check its validity to avoid dereferences. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- sound/soc/soc-topology.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index ea16c2b199ce..50617db05c46 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -956,7 +956,7 @@ static void soc_remove_dai(struct snd_soc_dai *dai, int order) { int err; - if (!dai || !dai->probed || + if (!dai || !dai->probed || !dai->driver || dai->driver->remove_order != order) return; diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 045ef136903d..fc79ec6927e3 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -502,6 +502,7 @@ static void remove_dai(struct snd_soc_component *comp, { struct snd_soc_dai_driver *dai_drv = container_of(dobj, struct snd_soc_dai_driver, dobj); + struct snd_soc_dai *dai; if (pass != SOC_TPLG_PASS_PCM_DAI) return; @@ -509,6 +510,10 @@ static void remove_dai(struct snd_soc_component *comp, if (dobj->ops && dobj->ops->dai_unload) dobj->ops->dai_unload(comp, dobj); + list_for_each_entry(dai, &comp->dai_list, list) + if (dai->driver == dai_drv) + dai->driver = NULL; + kfree(dai_drv->name); list_del(&dobj->list); kfree(dai_drv); From c14f07c6211cc01d52ed92cce1fade5071b8d197 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 31 Jan 2019 16:59:46 +0000 Subject: [PATCH 0533/1436] Revert "net: phy: marvell: avoid pause mode on SGMII-to-Copper for 88e151x" This reverts commit 6623c0fba10ef45b64ca213ad5dec926f37fa9a0. The original diagnosis was incorrect: it appears that the NIC had PHY polling mode enabled, which meant that it overwrote the PHYs advertisement register during negotiation. Signed-off-by: Russell King Tested-by: Yonglong Liu Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2e12f982534f..abb7876a8776 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -847,7 +847,6 @@ static int m88e1510_config_init(struct phy_device *phydev) /* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { - /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@ -870,21 +869,6 @@ static int m88e1510_config_init(struct phy_device *phydev) err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; - - /* There appears to be a bug in the 88e1512 when used in - * SGMII to copper mode, where the AN advertisement register - * clears the pause bits each time a negotiation occurs. - * This means we can never be truely sure what was advertised, - * so disable Pause support. - */ - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->advertising); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->advertising); } return m88e1318_config_init(phydev); From dda7a817f2873a0e0b1c7fde1265758f3623daa4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:49 +0200 Subject: [PATCH 0534/1436] net/mlx5: Add XRC transport to ODP device capabilities layout The device capabilities for ODP structure was missing the field for XRC transport so add it here. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..5407db8ba8e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -831,7 +831,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { From 46861e3e88be18846971792b763eaf520a91a802 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:51 +0200 Subject: [PATCH 0535/1436] net/mlx5: Set ODP SRQ support in firmware To avoid compatibility issue with older kernels the firmware doesn't allow SRQ to work with ODP unless kernel asks for it. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/main.c | 53 +++++++++++++++++++ include/linux/mlx5/device.h | 3 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 3 files changed, 57 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 085e1133b8d5..e38aa206ab6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + /** + * If all bits are cleared we shouldn't try to set it + * or we might fail while trying to access a reserved bit. + */ + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -931,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..0845a227a7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5407db8ba8e1..c5c679390fbd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; From d28af26faa0b1daf3c692603d46bc4687c16f19e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 31 Jan 2019 16:33:41 -0800 Subject: [PATCH 0536/1436] x86/MCE: Initialize mce.bank in the case of a fatal error in mce_no_way_out() Internal injection testing crashed with a console log that said: mce: [Hardware Error]: CPU 7: Machine Check Exception: f Bank 0: bd80000000100134 This caused a lot of head scratching because the MCACOD (bits 15:0) of that status is a signature from an L1 data cache error. But Linux says that it found it in "Bank 0", which on this model CPU only reports L1 instruction cache errors. The answer was that Linux doesn't initialize "m->bank" in the case that it finds a fatal error in the mce_no_way_out() pre-scan of banks. If this was a local machine check, then this partially initialized struct mce is being passed to mce_panic(). Fix is simple: just initialize m->bank in the case of a fatal error. Fixes: 40c36e2741d7 ("x86/mce: Fix incorrect "Machine check from unknown source" message") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Vishal Verma Cc: x86-ml Cc: stable@vger.kernel.org # v4.18 Note pre-v5.0 arch/x86/kernel/cpu/mce/core.c was called arch/x86/kernel/cpu/mcheck/mce.c Link: https://lkml.kernel.org/r/20190201003341.10638-1-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 672c7225cb1b..6ce290c506d9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -784,6 +784,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; From 22b5c0b63f32568e130fa2df4ba23efce3eb495b Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:06 +0100 Subject: [PATCH 0537/1436] vsock/virtio: fix kernel panic after device hot-unplug virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity. This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu Signed-off-by: Stefano Garzarella Acked-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..9dae54698737 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -669,7 +666,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +698,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } From 85965487abc540368393a15491e6e7fcd230039d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:07 +0100 Subject: [PATCH 0538/1436] vsock/virtio: reset connected sockets on device removal When the virtio transport device disappear, we should reset all connected sockets in order to inform the users. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9dae54698737..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -634,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); From 8dfb8d2cceb76b74ad5b58cc65c75994329b4d5e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 1 Feb 2019 13:23:38 -0800 Subject: [PATCH 0539/1436] net: systemport: Fix WoL with password after deep sleep Broadcom STB chips support a deep sleep mode where all register contents are lost. Because we were stashing the MagicPacket password into some of these registers a suspend into that deep sleep then a resumption would not lead to being able to wake-up from MagicPacket with password again. Fix this by keeping a software copy of the password and program it during suspend. Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 25 +++++++++------------- drivers/net/ethernet/broadcom/bcmsysport.h | 2 ++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9521d0274b7..28c9b0bdf2f6 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -520,7 +520,6 @@ static void bcm_sysport_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcm_sysport_priv *priv = netdev_priv(dev); - u32 reg; wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; @@ -528,11 +527,7 @@ static void bcm_sysport_get_wol(struct net_device *dev, if (!(priv->wolopts & WAKE_MAGICSECURE)) return; - /* Return the programmed SecureOn password */ - reg = umac_readl(priv, UMAC_PSW_MS); - put_unaligned_be16(reg, &wol->sopass[0]); - reg = umac_readl(priv, UMAC_PSW_LS); - put_unaligned_be32(reg, &wol->sopass[2]); + memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } static int bcm_sysport_set_wol(struct net_device *dev, @@ -548,13 +543,8 @@ static int bcm_sysport_set_wol(struct net_device *dev, if (wol->wolopts & ~supported) return -EINVAL; - /* Program the SecureOn password */ - if (wol->wolopts & WAKE_MAGICSECURE) { - umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), - UMAC_PSW_MS); - umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), - UMAC_PSW_LS); - } + if (wol->wolopts & WAKE_MAGICSECURE) + memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass)); /* Flag the device and relevant IRQ as wakeup capable */ if (wol->wolopts) { @@ -2649,13 +2639,18 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) unsigned int index, i = 0; u32 reg; - /* Password has already been programmed */ reg = umac_readl(priv, UMAC_MPD_CTRL); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) reg |= MPD_EN; reg &= ~PSW_EN; - if (priv->wolopts & WAKE_MAGICSECURE) + if (priv->wolopts & WAKE_MAGICSECURE) { + /* Program the SecureOn password */ + umac_writel(priv, get_unaligned_be16(&priv->sopass[0]), + UMAC_PSW_MS); + umac_writel(priv, get_unaligned_be32(&priv->sopass[2]), + UMAC_PSW_LS); reg |= PSW_EN; + } umac_writel(priv, reg, UMAC_MPD_CTRL); if (priv->wolopts & WAKE_FILTER) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 0887e6356649..0b192fea9c5d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -12,6 +12,7 @@ #define __BCM_SYSPORT_H #include +#include #include #include @@ -778,6 +779,7 @@ struct bcm_sysport_priv { unsigned int crc_fwd:1; u16 rev; u32 wolopts; + u8 sopass[SOPASS_MAX]; unsigned int wol_irq_disabled:1; /* MIB related fields */ From 8c22d81d55353209f8976074ffa9fb1085da0830 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Fri, 1 Feb 2019 16:35:20 +0530 Subject: [PATCH 0540/1436] MAINTAINERS: add entry for redpine wireless driver Create an entry for Redpine wireless driver and add Amit and myself as maintainers. Signed-off-by: Siva Rebbagondla Signed-off-by: Kalle Valo --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 32d444476a90..1043125a52b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12864,6 +12864,13 @@ F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt F: drivers/net/dsa/realtek-smi* F: drivers/net/dsa/rtl83* +REDPINE WIRELESS DRIVER +M: Amitkumar Karwar +M: Siva Rebbagondla +L: linux-wireless@vger.kernel.org +S: Maintained +F: drivers/net/wireless/rsi/ + REGISTER MAP ABSTRACTION M: Mark Brown L: linux-kernel@vger.kernel.org From 8834f5600cf3c8db365e18a3d5cac2c2780c81e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Feb 2019 13:48:04 -0800 Subject: [PATCH 0541/1436] Linux 5.0-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 141653226f3c..3142e67d03f1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Shy Crocodile # *DOCUMENTATION* From aa6ee4ab69293969867ab09b57546d226ace3d7a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 1 Feb 2019 09:36:36 -0800 Subject: [PATCH 0542/1436] xfs: eof trim writeback mapping as soon as it is cached The cached writeback mapping is EOF trimmed to try and avoid races between post-eof block management and writeback that result in sending cached data to a stale location. The cached mapping is currently trimmed on the validation check, which leaves a race window between the time the mapping is cached and when it is trimmed against the current inode size. For example, if a new mapping is cached by delalloc conversion on a blocksize == page size fs, we could cycle various locks, perform memory allocations, etc. in the writeback codepath before the associated mapping is eventually trimmed to i_size. This leaves enough time for a post-eof truncate and file append before the cached mapping is trimmed. The former event essentially invalidates a range of the cached mapping and the latter bumps the inode size such the trim on the next writepage event won't trim all of the invalid blocks. fstest generic/464 reproduces this scenario occasionally and causes a lost writeback and stale delalloc blocks warning on inode inactivation. To work around this problem, trim the cached writeback mapping as soon as it is cached in addition to on subsequent validation checks. This is a minor tweak to tighten the race window as much as possible until a proper invalidation mechanism is available. Fixes: 40214d128e07 ("xfs: trim writepage mapping to within eof") Cc: # v4.14+ Signed-off-by: Brian Foster Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 338b9d9984e0..d9048bcea49c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -449,6 +449,7 @@ xfs_map_blocks( } wpc->imap = imap; + xfs_trim_extent_eof(&wpc->imap, ip); trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); return 0; allocate_blocks: @@ -459,6 +460,7 @@ allocate_blocks: ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF || imap.br_startoff + imap.br_blockcount <= cow_fsb); wpc->imap = imap; + xfs_trim_extent_eof(&wpc->imap, ip); trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); return 0; } From 465fa17f4a303d9fdff9eac4d45f91ece92e96ca Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 3 Feb 2019 14:03:06 -0800 Subject: [PATCH 0543/1436] xfs: end sync buffer I/O properly on shutdown error As of commit e339dd8d8b ("xfs: use sync buffer I/O for sync delwri queue submission"), the delwri submission code uses sync buffer I/O for sync delwri I/O. Instead of waiting on async I/O to unlock the buffer, it uses the underlying sync I/O completion mechanism. If delwri buffer submission fails due to a shutdown scenario, an error is set on the buffer and buffer completion never occurs. This can cause xfs_buf_delwri_submit() to deadlock waiting on a completion event. We could check the error state before waiting on such buffers, but that doesn't serialize against the case of an error set via a racing I/O completion. Instead, invoke I/O completion in the shutdown case regardless of buffer I/O type. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eedc5e0156ff..1f9857e3630a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1536,8 +1536,7 @@ __xfs_buf_submit( xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); - if (bp->b_flags & XBF_ASYNC) - xfs_buf_ioend(bp); + xfs_buf_ioend(bp); return -EIO; } From add46b3b021263c02d5a7080c58e5b459479fafd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 3 Feb 2019 14:03:59 -0800 Subject: [PATCH 0544/1436] xfs: set buffer ops when repair probes for btree type In xrep_findroot_block, we work out the btree type and correctness of a given block by calling different btree verifiers on root block candidates. However, we leave the NULL b_ops while ->verify_read validates the block, which means that if the verifier calls xfs_buf_verifier_error it'll crash on the null b_ops. Fix it to set b_ops before calling the verifier and unsetting it if the verifier fails. Furthermore, improve the documentation around xfs_buf_ensure_ops, which is the function that is responsible for cleaning up the b_ops state of buffers that go through xrep_findroot_block but don't match anything. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/repair.c | 11 ++++++++--- fs/xfs/xfs_buf.c | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 1c8eecfe52b8..6acf1bfa0bfe 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -768,18 +768,23 @@ xrep_findroot_block( if (!uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) goto out; + /* + * Read verifiers can reference b_ops, so we set the pointer + * here. If the verifier fails we'll reset the buffer state + * to what it was before we touched the buffer. + */ + bp->b_ops = fab->buf_ops; fab->buf_ops->verify_read(bp); if (bp->b_error) { + bp->b_ops = NULL; bp->b_error = 0; goto out; } /* * Some read verifiers will (re)set b_ops, so we must be - * careful not to blow away any such assignment. + * careful not to change b_ops after running the verifier. */ - if (!bp->b_ops) - bp->b_ops = fab->buf_ops; } /* diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1f9857e3630a..4f5f2ff3f70f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -776,10 +776,26 @@ _xfs_buf_read( } /* + * Set buffer ops on an unchecked buffer and validate it, if possible. + * * If the caller passed in an ops structure and the buffer doesn't have ops * assigned, set the ops and use them to verify the contents. If the contents * cannot be verified, we'll clear XBF_DONE. We assume the buffer has no * recorded errors and is already in XBF_DONE state. + * + * Under normal operations, every in-core buffer must have buffer ops assigned + * to them when the buffer is read in from disk so that we can validate the + * metadata. + * + * However, there are two scenarios where one can encounter in-core buffers + * that don't have buffer ops. The first is during log recovery of buffers on + * a V4 filesystem, though these buffers are purged at the end of recovery. + * + * The other is online repair, which tries to match arbitrary metadata blocks + * with btree types in order to find the root. If online repair doesn't match + * the buffer with /any/ btree type, the buffer remains in memory in DONE state + * with no ops, and a subsequent read_buf call from elsewhere will not set the + * ops. This function helps us fix this situation. */ int xfs_buf_ensure_ops( From cfe4bd7a257f6d6f81d3458d8c9d9ec4957539e6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 4 Feb 2019 03:27:58 +0800 Subject: [PATCH 0545/1436] sctp: check and update stream->out_curr when allocating stream_out Now when using stream reconfig to add out streams, stream->out will get re-allocated, and all old streams' information will be copied to the new ones and the old ones will be freed. So without stream->out_curr updated, next time when trying to send from stream->out_curr stream, a panic would be caused. This patch is to check and update stream->out_curr when allocating stream_out. v1->v2: - define fa_index() to get elem index from stream->out_curr. v2->v3: - repost with no change. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Ying Xu Reported-by: syzbot+e33a3a138267ca119c7d@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 80e0ae5534ec..f24633114dfd 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count) } } +static size_t fa_index(struct flex_array *fa, void *elem, size_t count) +{ + size_t index = 0; + + while (count--) { + if (elem == flex_array_get(fa, index)) + break; + index++; + } + + return index; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -147,6 +160,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, if (stream->out) { fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); + if (stream->out_curr) { + size_t index = fa_index(stream->out, stream->out_curr, + stream->outcnt); + + BUG_ON(index == stream->outcnt); + stream->out_curr = flex_array_get(out, index); + } fa_free(stream->out); } From 546f28974d771b124fb0bf7b551b343888cf0419 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 31 Jan 2019 20:40:30 +0900 Subject: [PATCH 0546/1436] virtio_net: Account for tx bytes and packets on sending xdp_frames Previously virtnet_xdp_xmit() did not account for device tx counters, which caused confusions. To be consistent with SKBs, account them on freeing xdp_frames. Reported-by: David Ahern Signed-off-by: Toshiaki Makita Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 259448182272..4cfceb789eea 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -503,6 +503,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; + int packets = 0; + int bytes = 0; int drops = 0; int kicks = 0; int ret, err; @@ -526,10 +528,18 @@ static int virtnet_xdp_xmit(struct net_device *dev, /* Free up any pending old buffers before queueing new ones. */ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { - if (likely(is_xdp_frame(ptr))) - xdp_return_frame(ptr_to_xdp(ptr)); - else - napi_consume_skb(ptr, false); + if (likely(is_xdp_frame(ptr))) { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += frame->len; + xdp_return_frame(frame); + } else { + struct sk_buff *skb = ptr; + + bytes += skb->len; + napi_consume_skb(skb, false); + } + packets++; } for (i = 0; i < n; i++) { @@ -549,6 +559,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, } out: u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; sq->stats.xdp_tx += n; sq->stats.xdp_tx_drops += drops; sq->stats.kicks += kicks; From 341198eda723c8c1cddbb006a89ad9e362502ea2 Mon Sep 17 00:00:00 2001 From: Leonid Iziumtsev Date: Tue, 15 Jan 2019 17:15:23 +0000 Subject: [PATCH 0547/1436] dmaengine: imx-dma: fix wrong callback invoke Once the "ld_queue" list is not empty, next descriptor will migrate into "ld_active" list. The "desc" variable will be overwritten during that transition. And later the dmaengine_desc_get_callback_invoke() will use it as an argument. As result we invoke wrong callback. That behaviour was in place since: commit fcaaba6c7136 ("dmaengine: imx-dma: fix callback path in tasklet"). But after commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job") things got worse, since possible delay between tasklet_schedule() from DMA irq handler and actual tasklet function execution got bigger. And that gave more time for new DMA request to be submitted and to be put into "ld_queue" list. It has been noticed that DMA issue is causing problems for "mxc-mmc" driver. While stressing the system with heavy network traffic and writing/reading to/from sd card simultaneously the timeout may happen: 10013000.sdhci: mxcmci_watchdog: read time out (status = 0x30004900) That often lead to file system corruption. Signed-off-by: Leonid Iziumtsev Signed-off-by: Vinod Koul Cc: stable@vger.kernel.org --- drivers/dma/imx-dma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index c2fff3f6c9ca..4a09af3cd546 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -618,7 +618,7 @@ static void imxdma_tasklet(unsigned long data) { struct imxdma_channel *imxdmac = (void *)data; struct imxdma_engine *imxdma = imxdmac->imxdma; - struct imxdma_desc *desc; + struct imxdma_desc *desc, *next_desc; unsigned long flags; spin_lock_irqsave(&imxdma->lock, flags); @@ -648,10 +648,10 @@ static void imxdma_tasklet(unsigned long data) list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free); if (!list_empty(&imxdmac->ld_queue)) { - desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc, - node); + next_desc = list_first_entry(&imxdmac->ld_queue, + struct imxdma_desc, node); list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active); - if (imxdma_xfer_desc(desc) < 0) + if (imxdma_xfer_desc(next_desc) < 0) dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n", __func__, imxdmac->channel); } From f7da7782aba92593f7b82f03d2409a1c5f4db91b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Jan 2019 09:26:00 +0100 Subject: [PATCH 0548/1436] dmaengine: bcm2835: Fix interrupt race on RT If IRQ handlers are threaded (either because CONFIG_PREEMPT_RT_BASE is enabled or "threadirqs" was passed on the command line) and if system load is sufficiently high that wakeup latency of IRQ threads degrades, SPI DMA transactions on the BCM2835 occasionally break like this: ks8851 spi0.0: SPI transfer timed out bcm2835-dma 3f007000.dma: DMA transfer could not be terminated ks8851 spi0.0 eth2: ks8851_rdfifo: spi_sync() failed The root cause is an assumption made by the DMA driver which is documented in a code comment in bcm2835_dma_terminate_all(): /* * Stop DMA activity: we assume the callback will not be called * after bcm_dma_abort() returns (even if it does, it will see * c->desc is NULL and exit.) */ That assumption falls apart if the IRQ handler bcm2835_dma_callback() is threaded: A client may terminate a descriptor and issue a new one before the IRQ handler had a chance to run. In fact the IRQ handler may miss an *arbitrary* number of descriptors. The result is the following race condition: 1. A descriptor finishes, its interrupt is deferred to the IRQ thread. 2. A client calls dma_terminate_async() which sets channel->desc = NULL. 3. The client issues a new descriptor. Because channel->desc is NULL, bcm2835_dma_issue_pending() immediately starts the descriptor. 4. Finally the IRQ thread runs and writes BCM2835_DMA_INT to the CS register to acknowledge the interrupt. This clears the ACTIVE flag, so the newly issued descriptor is paused in the middle of the transaction. Because channel->desc is not NULL, the IRQ thread finalizes the descriptor and tries to start the next one. I see two possible solutions: The first is to call synchronize_irq() in bcm2835_dma_issue_pending() to wait until the IRQ thread has finished before issuing a new descriptor. The downside of this approach is unnecessary latency if clients desire rapidly terminating and re-issuing descriptors and don't have any use for an IRQ callback. (The SPI TX DMA channel is a case in point.) A better alternative is to make the IRQ thread recognize that it has missed descriptors and avoid finalizing the newly issued descriptor. So first of all, set the ACTIVE flag when acknowledging the interrupt. This keeps a newly issued descriptor running. If the descriptor was finished, the channel remains idle despite the ACTIVE flag being set. However the ACTIVE flag can then no longer be used to check whether the channel is idle, so instead check whether the register containing the current control block address is zero and finalize the current descriptor only if so. That way, there is no impact on latency and throughput if the client doesn't care for the interrupt: Only minimal additional overhead is introduced for non-cyclic descriptors as one further MMIO read is necessary per interrupt to check for idleness of the channel. Cyclic descriptors are sped up slightly by removing one MMIO write per interrupt. Fixes: 96286b576690 ("dmaengine: Add support for BCM2835") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.14+ Cc: Frank Pavlic Cc: Martin Sperl Cc: Florian Meier Cc: Clive Messer Cc: Matthias Reichl Tested-by: Stefan Wahren Acked-by: Florian Kauer Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 1a44c8086d77..0c3f5c71bb48 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -412,7 +412,12 @@ static int bcm2835_dma_abort(void __iomem *chan_base) long int timeout = 10000; cs = readl(chan_base + BCM2835_DMA_CS); - if (!(cs & BCM2835_DMA_ACTIVE)) + + /* + * A zero control block address means the channel is idle. + * (The ACTIVE flag in the CS register is not a reliable indicator.) + */ + if (!readl(chan_base + BCM2835_DMA_ADDR)) return 0; /* Write 0 to the active bit - Pause the DMA */ @@ -476,8 +481,15 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) spin_lock_irqsave(&c->vc.lock, flags); - /* Acknowledge interrupt */ - writel(BCM2835_DMA_INT, c->chan_base + BCM2835_DMA_CS); + /* + * Clear the INT flag to receive further interrupts. Keep the channel + * active in case the descriptor is cyclic or in case the client has + * already terminated the descriptor and issued a new one. (May happen + * if this IRQ handler is threaded.) If the channel is finished, it + * will remain idle despite the ACTIVE flag being set. + */ + writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE, + c->chan_base + BCM2835_DMA_CS); d = c->desc; @@ -485,11 +497,7 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data) if (d->cyclic) { /* call the cyclic callback */ vchan_cyclic_callback(&d->vd); - - /* Keep the DMA engine running */ - writel(BCM2835_DMA_ACTIVE, - c->chan_base + BCM2835_DMA_CS); - } else { + } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) { vchan_cookie_complete(&c->desc->vd); bcm2835_dma_start_desc(c); } @@ -789,11 +797,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) list_del_init(&c->node); spin_unlock(&d->lock); - /* - * Stop DMA activity: we assume the callback will not be called - * after bcm_dma_abort() returns (even if it does, it will see - * c->desc is NULL and exit.) - */ + /* stop DMA activity */ if (c->desc) { vchan_terminate_vdesc(&c->desc->vd); c->desc = NULL; @@ -801,8 +805,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) /* Wait for stopping */ while (--timeout) { - if (!(readl(c->chan_base + BCM2835_DMA_CS) & - BCM2835_DMA_ACTIVE)) + if (!readl(c->chan_base + BCM2835_DMA_ADDR)) break; cpu_relax(); From 9e528c799d17a4ac37d788c81440b50377dd592d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 23 Jan 2019 09:26:00 +0100 Subject: [PATCH 0549/1436] dmaengine: bcm2835: Fix abort of transactions There are multiple issues with bcm2835_dma_abort() (which is called on termination of a transaction): * The algorithm to abort the transaction first pauses the channel by clearing the ACTIVE flag in the CS register, then waits for the PAUSED flag to clear. Page 49 of the spec documents the latter as follows: "Indicates if the DMA is currently paused and not transferring data. This will occur if the active bit has been cleared [...]" https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf So the function is entering an infinite loop because it is waiting for PAUSED to clear which is always set due to the function having cleared the ACTIVE flag. The only thing that's saving it from itself is the upper bound of 10000 loop iterations. The code comment says that the intention is to "wait for any current AXI transfer to complete", so the author probably wanted to check the WAITING_FOR_OUTSTANDING_WRITES flag instead. Amend the function accordingly. * The CS register is only read at the beginning of the function. It needs to be read again after pausing the channel and before checking for outstanding writes, otherwise writes which were issued between the register read at the beginning of the function and pausing the channel may not be waited for. * The function seeks to abort the transfer by writing 0 to the NEXTCONBK register and setting the ABORT and ACTIVE flags. Thereby, the 0 in NEXTCONBK is sought to be loaded into the CONBLK_AD register. However experimentation has shown this approach to not work: The CONBLK_AD register remains the same as before and the CS register contains 0x00000030 (PAUSED | DREQ_STOPS_DMA). In other words, the control block is not aborted but merely paused and it will be resumed once the next DMA transaction is started. That is absolutely not the desired behavior. A simpler approach is to set the channel's RESET flag instead. This reliably zeroes the NEXTCONBK as well as the CS register. It requires less code and only a single MMIO write. This is also what popular user space DMA drivers do, e.g.: https://github.com/metachris/RPIO/blob/master/source/c_pwm/pwm.c Note that the spec is contradictory whether the NEXTCONBK register is writeable at all. On the one hand, page 41 claims: "The value loaded into the NEXTCONBK register can be overwritten so that the linked list of Control Block data structures can be dynamically altered. However it is only safe to do this when the DMA is paused." On the other hand, page 40 specifies: "Only three registers in each channel's register set are directly writeable (CS, CONBLK_AD and DEBUG). The other registers (TI, SOURCE_AD, DEST_AD, TXFR_LEN, STRIDE & NEXTCONBK), are automatically loaded from a Control Block data structure held in external memory." Fixes: 96286b576690 ("dmaengine: Add support for BCM2835") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.14+ Cc: Frank Pavlic Cc: Martin Sperl Cc: Florian Meier Cc: Clive Messer Cc: Matthias Reichl Tested-by: Stefan Wahren Acked-by: Florian Kauer Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 41 +++++++++------------------------------ 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 0c3f5c71bb48..ae10f5614f95 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -406,13 +406,11 @@ static void bcm2835_dma_fill_cb_chain_with_sg( } } -static int bcm2835_dma_abort(void __iomem *chan_base) +static int bcm2835_dma_abort(struct bcm2835_chan *c) { - unsigned long cs; + void __iomem *chan_base = c->chan_base; long int timeout = 10000; - cs = readl(chan_base + BCM2835_DMA_CS); - /* * A zero control block address means the channel is idle. * (The ACTIVE flag in the CS register is not a reliable indicator.) @@ -424,25 +422,16 @@ static int bcm2835_dma_abort(void __iomem *chan_base) writel(0, chan_base + BCM2835_DMA_CS); /* Wait for any current AXI transfer to complete */ - while ((cs & BCM2835_DMA_ISPAUSED) && --timeout) { + while ((readl(chan_base + BCM2835_DMA_CS) & + BCM2835_DMA_WAITING_FOR_WRITES) && --timeout) cpu_relax(); - cs = readl(chan_base + BCM2835_DMA_CS); - } - /* We'll un-pause when we set of our next DMA */ + /* Peripheral might be stuck and fail to signal AXI write responses */ if (!timeout) - return -ETIMEDOUT; - - if (!(cs & BCM2835_DMA_ACTIVE)) - return 0; - - /* Terminate the control block chain */ - writel(0, chan_base + BCM2835_DMA_NEXTCB); - - /* Abort the whole DMA */ - writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE, - chan_base + BCM2835_DMA_CS); + dev_err(c->vc.chan.device->dev, + "failed to complete outstanding writes\n"); + writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS); return 0; } @@ -787,7 +776,6 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device); unsigned long flags; - int timeout = 10000; LIST_HEAD(head); spin_lock_irqsave(&c->vc.lock, flags); @@ -801,18 +789,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) if (c->desc) { vchan_terminate_vdesc(&c->desc->vd); c->desc = NULL; - bcm2835_dma_abort(c->chan_base); - - /* Wait for stopping */ - while (--timeout) { - if (!readl(c->chan_base + BCM2835_DMA_ADDR)) - break; - - cpu_relax(); - } - - if (!timeout) - dev_err(d->ddev.dev, "DMA transfer could not be terminated\n"); + bcm2835_dma_abort(c); } vchan_get_all_descriptors(&c->vc, &head); From 9e63a7894fd302082cf3627fe90844421a6cbe7f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 27 Jan 2019 06:53:14 -0800 Subject: [PATCH 0550/1436] perf/x86/intel/uncore: Add Node ID mask Some PCI uncore PMUs cannot be registered on an 8-socket system (HPE Superdome Flex). To understand which Socket the PCI uncore PMUs belongs to, perf retrieves the local Node ID of the uncore device from CPUNODEID(0xC0) of the PCI configuration space, and the mapping between Socket ID and Node ID from GIDNIDMAP(0xD4). The Socket ID can be calculated accordingly. The local Node ID is only available at bit 2:0, but current code doesn't mask it. If a BIOS doesn't clear the rest of the bits, an incorrect Node ID will be fetched. Filter the Node ID by adding a mask. Reported-by: Song Liu Tested-by: Song Liu Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: # v3.7+ Fixes: 7c94ee2e0917 ("perf/x86: Add Intel Nehalem and Sandy Bridge-EP uncore support") Link: https://lkml.kernel.org/r/1548600794-33162-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index c07bee31abe8..b10e04387f38 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = { .id_table = snbep_uncore_pci_ids, }; +#define NODE_ID_MASK 0x7 + /* * build pci bus to socket mapping */ @@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool err = pci_read_config_dword(ubox_dev, nodeid_loc, &config); if (err) break; - nodeid = config; + nodeid = config & NODE_ID_MASK; /* get the Node ID mapping */ err = pci_read_config_dword(ubox_dev, idmap_loc, &config); if (err) From 602cae04c4864bb3487dfe4c2126c8d9e7e1614a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Dec 2018 17:53:50 +0100 Subject: [PATCH 0551/1436] perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu() intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other members of struct cpu_hw_events. This memory is released in intel_pmu_cpu_dying() which is wrong. The counterpart of the intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu(). Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but allocate new memory on the next attempt to online the CPU (leaking the old memory). Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and CPUHP_PERF_X86_PREPARE then the CPU will go back online but never allocate the memory that was released in x86_pmu_dying_cpu(). Make the memory allocation/free symmetrical in regard to the CPU hotplug notifier by moving the deallocation to intel_pmu_cpu_dead(). This started in commit: a7e3ed1e47011 ("perf: Add support for supplementary event registers"). In principle the bug was introduced in v2.6.39 (!), but it will almost certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15... [ bigeasy: Added patch description. ] [ mingo: Added backporting guidance. ] Reported-by: He Zhe Signed-off-by: Peter Zijlstra (Intel) # With developer hat on Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) # With maintainer hat on Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: bp@alien8.de Cc: hpa@zytor.com Cc: jolsa@kernel.org Cc: kan.liang@linux.intel.com Cc: namhyung@kernel.org Cc: Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers"). Link: https://lkml.kernel.org/r/20181219165350.6s3jvyxbibpvlhtq@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 40e12cfc87f6..daafb893449b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu) } static void intel_pmu_cpu_dying(int cpu) +{ + fini_debug_store_on_cpu(cpu); + + if (x86_pmu.counter_freezing) + disable_counter_freeze(); +} + +static void intel_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_shared_regs *pc; @@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu) } free_excl_cntrs(cpu); - - fini_debug_store_on_cpu(cpu); - - if (x86_pmu.counter_freezing) - disable_counter_freeze(); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, }; static struct attribute *intel_pmu_attrs[]; @@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .cpu_dead = intel_pmu_cpu_dead, + .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, }; From 9dff0aa95a324e262ffb03f425d00e4751f3294e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 10 Jan 2019 14:27:45 +0000 Subject: [PATCH 0552/1436] perf/core: Don't WARN() for impossible ring-buffer sizes The perf tool uses /proc/sys/kernel/perf_event_mlock_kb to determine how large its ringbuffer mmap should be. This can be configured to arbitrary values, which can be larger than the maximum possible allocation from kmalloc. When this is configured to a suitably large value (e.g. thanks to the perf fuzzer), attempting to use perf record triggers a WARN_ON_ONCE() in __alloc_pages_nodemask(): WARNING: CPU: 2 PID: 5666 at mm/page_alloc.c:4511 __alloc_pages_nodemask+0x3f8/0xbc8 Let's avoid this by checking that the requested allocation is possible before calling kzalloc. Reported-by: Julien Thierry Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Julien Thierry Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20190110142745.25495-1-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4a9937076331..309ef5a64af5 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); + if (order_base_2(size) >= MAX_ORDER) + goto fail; + rb = kzalloc(size, GFP_KERNEL); if (!rb) goto fail; From 6454368a804c4955ccd116236037536f81e5b1f1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Jan 2019 21:48:44 +0200 Subject: [PATCH 0553/1436] dmaengine: dmatest: Abort test in case of mapping error In case of mapping error the DMA addresses are invalid and continuing will screw system memory or potentially something else. [ 222.480310] dmatest: dma0chan7-copy0: summary 1 tests, 3 failures 6 iops 349 KB/s (0) ... [ 240.912725] check: Corrupted low memory at 00000000c7c75ac9 (2940 phys) = 5656000000000000 [ 240.921998] check: Corrupted low memory at 000000005715a1cd (2948 phys) = 279f2aca5595ab2b [ 240.931280] check: Corrupted low memory at 000000002f4024c0 (2950 phys) = 5e5624f349e793cf ... Abort any test if mapping failed. Fixes: 4076e755dbec ("dmatest: convert to dmaengine_unmap_data") Cc: Dan Williams Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dmatest.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 2eea4ef72915..6511928b4cdf 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -711,11 +711,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -730,11 +728,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -762,12 +758,10 @@ static int dmatest_func(void *data) } if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } done->done = false; @@ -776,12 +770,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan); @@ -790,22 +782,20 @@ static int dmatest_func(void *data) status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - dmaengine_unmap_put(um); - if (!done->done) { result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } + dmaengine_unmap_put(um); + if (params->noverify) { verbose_result("test passed", total_tests, src_off, dst_off, len, 0); @@ -846,6 +836,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime); From 952b72f89ae23b316da8c1021b18d0c388ad6cc4 Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Tue, 29 Jan 2019 06:28:35 +0000 Subject: [PATCH 0554/1436] selftests: netfilter: fix config fragment CONFIG_NF_TABLES_INET In selftests the config fragment for netfilter was added as NF_TABLES_INET=y and this patch correct it as CONFIG_NF_TABLES_INET=y Signed-off-by: Naresh Kamboju Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y From 98bfc3414bda335dbd7fec58bde6266f991801d7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 15:16:23 +0100 Subject: [PATCH 0555/1436] selftests: netfilter: add simple masq/redirect test cases Check basic nat/redirect/masquerade for ipv4 and ipv6. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_nat.sh | 762 +++++++++++++++++++ 2 files changed, 763 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_nat.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - < /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret From 4e35c1cb9460240e983a01745b5f29fe3a4d8e39 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Tue, 29 Jan 2019 15:51:42 +0100 Subject: [PATCH 0556/1436] netfilter: nf_nat: skip nat clash resolution for same-origin entries It is possible that two concurrent packets originating from the same socket of a connection-less protocol (e.g. UDP) can end up having different IP_CT_DIR_REPLY tuples which results in one of the packets being dropped. To illustrate this, consider the following simplified scenario: 1. Packet A and B are sent at the same time from two different threads by same UDP socket. No matching conntrack entry exists yet. Both packets cause allocation of a new conntrack entry. 2. get_unique_tuple gets called for A. No clashing entry found. conntrack entry for A is added to main conntrack table. 3. get_unique_tuple is called for B and will find that the reply tuple of B is already taken by A. It will allocate a new UDP source port for B to resolve the clash. 4. conntrack entry for B cannot be added to main conntrack table because its ORIGINAL direction is clashing with A and the REPLY directions of A and B are not the same anymore due to UDP source port reallocation done in step 3. This patch modifies nf_conntrack_tuple_taken so it doesn't consider colliding reply tuples if the IP_CT_DIR_ORIGINAL tuples are equal. [ Florian: simplify patch to not use .allow_clash setting and always ignore identical flows ] Signed-off-by: Martynas Pumputis Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 741b533148ba..db4d46332e86 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1007,6 +1007,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; From a46c52d9f2659498f0c0871f7f2333a692c243fe Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 29 Jan 2019 15:51:17 +0800 Subject: [PATCH 0557/1436] netfilter: nft_tunnel: Add NFTA_TUNNEL_MODE options nft "tunnel" expr match both the tun_info of RX and TX. This patch provide the NFTA_TUNNEL_MODE to individually match the tun_info of RX or TX. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 9 +++++++ net/netfilter/nft_tunnel.c | 34 ++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 030302893d96..a66c8de006cc 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1727,10 +1727,19 @@ enum nft_tunnel_keys { }; #define NFT_TUNNEL_MAX (__NFT_TUNNEL_MAX - 1) +enum nft_tunnel_mode { + NFT_TUNNEL_MODE_NONE, + NFT_TUNNEL_MODE_RX, + NFT_TUNNEL_MODE_TX, + __NFT_TUNNEL_MODE_MAX +}; +#define NFT_TUNNEL_MODE_MAX (__NFT_TUNNEL_MODE_MAX - 1) + enum nft_tunnel_attributes { NFTA_TUNNEL_UNSPEC, NFTA_TUNNEL_KEY, NFTA_TUNNEL_DREG, + NFTA_TUNNEL_MODE, __NFTA_TUNNEL_MAX }; #define NFTA_TUNNEL_MAX (__NFTA_TUNNEL_MAX - 1) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3a15f219e4e7..ea28588c5eed 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -15,6 +15,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; enum nft_registers dreg:8; + enum nft_tunnel_mode mode:8; }; static void nft_tunnel_get_eval(const struct nft_expr *expr, @@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_TUNNEL_PATH: - nft_reg_store8(dest, !!tun_info); + if (!tun_info) { + nft_reg_store8(dest, false); + return; + } + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + nft_reg_store8(dest, true); + else + nft_reg_store8(dest, false); break; case NFT_TUNNEL_ID: if (!tun_info) { regs->verdict.code = NFT_BREAK; return; } - *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + else + regs->verdict.code = NFT_BREAK; break; default: WARN_ON(1); @@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = { .type = NLA_U32 }, [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, + [NFTA_TUNNEL_MODE] = { .type = NLA_U32 }, }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, @@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); + if (tb[NFTA_TUNNEL_MODE]) { + priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); + if (priv->mode > NFT_TUNNEL_MODE_MAX) + return -EOPNOTSUPP; + } else { + priv->mode = NFT_TUNNEL_MODE_NONE; + } + return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb, goto nla_put_failure; if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode))) + goto nla_put_failure; return 0; nla_put_failure: From 489338a717a0dfbbd5a3fabccf172b78f0ac9015 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Jan 2019 17:34:39 -0600 Subject: [PATCH 0558/1436] perf tests evsel-tp-sched: Fix bitwise operator Notice that the use of the bitwise OR operator '|' always leads to true in this particular case, which seems a bit suspicious due to the context in which this expression is being used. Fix this by using bitwise AND operator '&' instead. This bug was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") Link: http://lkml.kernel.org/r/20190122233439.GA5868@embeddedor Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-tp-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 5f8501c68da4..5cbba70bcdd0 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED); + is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); From f0fabf9c897327abd39018aefb5029aff8c7e133 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 29 Jan 2019 18:54:12 +0530 Subject: [PATCH 0559/1436] perf mem/c2c: Fix perf_mem_events to support powerpc PowerPC hardware does not have a builtin latency filter (--ldlat) for the "mem-load" event and perf_mem_events by default includes "/ldlat=30/" which is causing a failure on PowerPC. Refactor the code to support "perf mem/c2c" on PowerPC. This patch depends on kernel side changes done my Madhavan: https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Dick Fowles Cc: Don Zickus Cc: Joe Mario Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20190129132412.771-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 16 ++++++++++++---- tools/perf/Documentation/perf-mem.txt | 2 +- tools/perf/arch/powerpc/util/Build | 1 + tools/perf/arch/powerpc/util/mem-events.c | 11 +++++++++++ tools/perf/util/mem-events.c | 2 +- 5 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 tools/perf/arch/powerpc/util/mem-events.c diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 095aebdc5bb7..e6150f21267d 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -19,8 +19,11 @@ C2C stands for Cache To Cache. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. -The tool is based on x86's load latency and precise store facility events -provided by Intel CPUs. These events provide: +On x86, the tool is based on load latency and precise store facility events +provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling +with thresholding feature. + +These events provide: - memory address of the access - type of the access (load and store details) - latency (in cycles) of the load access @@ -46,7 +49,7 @@ RECORD OPTIONS -l:: --ldlat:: - Configure mem-loads latency. + Configure mem-loads latency. (x86 only) -k:: --all-kernel:: @@ -119,11 +122,16 @@ Following perf record options are configured by default: -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by -default: +default on x86: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P +and following on PowerPC: + + cpu/mem-loads/ + cpu/mem-stores/ + User can pass any 'perf record' option behind '--' mark, like (to enable callchains and system wide monitoring): diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index f8d2167cf3e7..199ea0f0a6c0 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -82,7 +82,7 @@ RECORD OPTIONS Be more verbose (show counter open errors, etc) --ldlat :: - Specify desired latency for loads event. + Specify desired latency for loads event. (x86 only) In addition, for report all perf report options are valid, and for record all perf record options. diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 2e6595310420..ba98bd006488 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += mem-events.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c new file mode 100644 index 000000000000..d08311f04e95 --- /dev/null +++ b/tools/perf/arch/powerpc/util/mem-events.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "mem-events.h" + +/* PowerPC does not support 'ldlat' parameter. */ +char *perf_mem_events__name(int i) +{ + if (i == PERF_MEM_EVENTS__LOAD) + return (char *) "cpu/mem-loads/"; + + return (char *) "cpu/mem-stores/"; +} diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 93f74d8d3cdd..42c3e5a229d2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; -char *perf_mem_events__name(int i) +char * __weak perf_mem_events__name(int i) { if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { From d34cecfb6b2bdc35713180ba4fcfd912a2f3e9bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Feb 2019 11:04:20 -0300 Subject: [PATCH 0560/1436] perf clang: Do not use 'return std::move(something)' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It prevents copy elision, generating this warning when building with fedora:rawhide's clang: clang version 7.0.1 (Fedora 7.0.1-2.fc30) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/bin Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/9 Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/9 Selected GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/9 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 $ make -C tools/perf CC=clang LIBCLANGLLVM=1 util/c++/clang.cpp: In function 'std::unique_ptr > perf::getBPFObjectFromModule(llvm::Module*)': util/c++/clang.cpp:163:18: error: moving a local object in a return statement prevents copy elision [-Werror=pessimizing-move] 163 | return std::move(Buffer); | ~~~~~~~~~^~~~~~~~ util/c++/clang.cpp:163:18: note: remove 'std::move' call cc1plus: all warnings being treated as errors References: http://www.cplusplus.com/forum/general/186411/#msg908572 https://en.cppreference.com/w/cpp/language/return#Notes https://en.cppreference.com/w/cpp/language/copy_elision Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-lehqf5x5q96l0o8myhb6blz6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/c++/clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 89512504551b..39c0004f2886 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module) } PM.run(*Module); - return std::move(Buffer); + return Buffer; } } From 27b8e90eaea6a6fe8c1ab457443601dabff500d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Feb 2019 11:34:20 -0300 Subject: [PATCH 0561/1436] tools headers uapi: Sync linux/in.h copy from the kernel sources To get the changes in this cset: f275ee0fa3a0 ("IN_BADCLASS: fix macro to actually work") The macros changed in this cset are not used in tools/, so this is just to silence this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: Adrian Hunter Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xbk34kwamn8bw8ywpuxetct9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index f6052e70bf40..a55cb8b10165 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) From d23c808c6fc6132e812690648e14c0d6b0cbe273 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Feb 2019 14:37:38 +0000 Subject: [PATCH 0562/1436] arm64: ptdump: Don't iterate kernel page tables using PTRS_PER_PXX When 52-bit virtual addressing is enabled for userspace (CONFIG_ARM64_USER_VA_BITS_52=y), the kernel continues to utilise 48-bit virtual addressing in TTBR1. Consequently, PTRS_PER_PGD reflects the larger page table size for userspace and the pgd pointer for kernel page tables is offset before being written to TTBR1. This means that we can't use PTRS_PER_PGD to iterate over kernel page tables unless we apply the same offset, which is fiddly to get right and leads to some non-idiomatic walking code. Instead, just follow the usual pattern when walking page tables by using a while loop driven by pXd_offset() and pXd_addr_end(). Reported-by: Qian Cai Tested-by: Qian Cai Acked-by: Steve Capper Tested-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/mm/dump.c | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index fcb1f2a6d7c6..99bb8facb5cb 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -286,74 +286,73 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start) +static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, + unsigned long end) { - pte_t *ptep = pte_offset_kernel(pmdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long addr = start; + pte_t *ptep = pte_offset_kernel(pmdp, start); - for (i = 0; i < PTRS_PER_PTE; i++, ptep++) { - addr = start + i * PAGE_SIZE; + do { note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } + } while (ptep++, addr += PAGE_SIZE, addr != end); } -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start) +static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, + unsigned long end) { - pmd_t *pmdp = pmd_offset(pudp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pmd_t *pmdp = pmd_offset(pudp, start); - for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) { + do { pmd_t pmd = READ_ONCE(*pmdp); + next = pmd_addr_end(addr, end); - addr = start + i * PMD_SIZE; if (pmd_none(pmd) || pmd_sect(pmd)) { note_page(st, addr, 3, pmd_val(pmd)); } else { BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr); + walk_pte(st, pmdp, addr, next); } - } + } while (pmdp++, addr = next, addr != end); } -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start) +static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, + unsigned long end) { - pud_t *pudp = pud_offset(pgdp, 0UL); - unsigned long addr; - unsigned i; + unsigned long next, addr = start; + pud_t *pudp = pud_offset(pgdp, start); - for (i = 0; i < PTRS_PER_PUD; i++, pudp++) { + do { pud_t pud = READ_ONCE(*pudp); + next = pud_addr_end(addr, end); - addr = start + i * PUD_SIZE; if (pud_none(pud) || pud_sect(pud)) { note_page(st, addr, 2, pud_val(pud)); } else { BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr); + walk_pmd(st, pudp, addr, next); } - } + } while (pudp++, addr = next, addr != end); } static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) { - pgd_t *pgdp = pgd_offset(mm, 0UL); - unsigned i; - unsigned long addr; + unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; + unsigned long next, addr = start; + pgd_t *pgdp = pgd_offset(mm, start); - for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) { + do { pgd_t pgd = READ_ONCE(*pgdp); + next = pgd_addr_end(addr, end); - addr = start + i * PGDIR_SIZE; if (pgd_none(pgd)) { note_page(st, addr, 1, pgd_val(pgd)); } else { BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr); + walk_pud(st, pgdp, addr, next); } - } + } while (pgdp++, addr = next, addr != end); } void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) From f6ac8585897684374a19863fff21186a05805286 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 10:49:13 +0100 Subject: [PATCH 0563/1436] netfilter: nf_tables: unbind set in rule from commit path Anonymous sets that are bound to rules from the same transaction trigger a kernel splat from the abort path due to double set list removal and double free. This patch updates the logic to search for the transaction that is responsible for creating the set and disable the set list removal and release, given the rule is now responsible for this. Lookup is reverse since the transaction that adds the set is likely to be at the tail of the list. Moreover, this patch adds the unbind step to deliver the event from the commit path. This should not be done from the worker thread, since we have no guarantees of in-order delivery to the listener. This patch removes the assumption that both activate and deactivate callbacks need to be provided. Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") Reported-by: Mikhail Morfikov Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 17 +++++-- net/netfilter/nf_tables_api.c | 85 +++++++++++++++---------------- net/netfilter/nft_compat.c | 6 ++- net/netfilter/nft_dynset.c | 18 +++---- net/netfilter/nft_immediate.c | 6 ++- net/netfilter/nft_lookup.c | 18 +++---- net/netfilter/nft_objref.c | 18 +++---- 7 files changed, 85 insertions(+), 83 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..b4984bbbe157 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -469,9 +469,7 @@ struct nft_set_binding { int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); + struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** @@ -721,6 +719,13 @@ struct nft_expr_type { #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2 +enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE +}; + /** * struct nft_expr_ops - nf_tables expression operations * @@ -750,7 +755,8 @@ struct nft_expr_ops { void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@ -1323,12 +1329,15 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fb07f6cfc719..5a92f23f179f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -116,6 +116,23 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -211,18 +228,6 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } -/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; -} - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -238,14 +243,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase); expr = nft_expr_next(expr); } @@ -296,7 +302,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE); return 0; } @@ -1929,9 +1935,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -2079,10 +2082,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops; @@ -2511,7 +2510,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } @@ -3708,39 +3707,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) -{ - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -6535,6 +6525,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@ -6621,7 +6614,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6682,7 +6676,9 @@ static int __nf_tables_abort(struct net *net) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@ -6692,7 +6688,8 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5eb269428832..0732a2fc697c 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -587,10 +587,14 @@ static void nft_compat_activate_tg(const struct nft_ctx *ctx, } static void nft_compat_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + if (phase == NFT_TRANS_COMMIT) + return; + if (--xt->listcnt == 0) list_del_init(&xt->head); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 07d4efd3d851..f1172f99752b 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -235,20 +235,17 @@ err1: return err; } -static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -296,7 +293,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0777a93211e2..3f6d1d2a6281 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, } static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 227b2b15a19c..14496da5141d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return 0; } -static void nft_lookup_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_lookup *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_lookup_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, - .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index a3185ca2a3a9..ae178e914486 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -155,20 +155,17 @@ nla_put_failure: return -1; } -static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -185,7 +182,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, From 53bc8d2af08654659abfadfd3e98eb9922ff787c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 4 Feb 2019 11:20:29 +0100 Subject: [PATCH 0564/1436] net: dp83640: expire old TX-skb During sendmsg() a cloned skb is saved via dp83640_txtstamp() in ->tx_queue. After the NIC sends this packet, the PHY will reply with a timestamp for that TX packet. If the cable is pulled at the right time I don't see that packet. It might gets flushed as part of queue shutdown on NIC's side. Once the link is up again then after the next sendmsg() we enqueue another skb in dp83640_txtstamp() and have two on the list. Then the PHY will send a reply and decode_txts() attaches it to the first skb on the list. No crash occurs since refcounting works but we are one packet behind. linuxptp/ptp4l usually closes the socket and opens a new one (in such a timeout case) so those "stale" replies never get there. However it does not resume normal operation anymore. Purge old skbs in decode_txts(). Fixes: cb646e2b02b2 ("ptp: Added a clock driver for the National Semiconductor PHYTER.") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Kurt Kanzenbach Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 18b41bc345ab..6e8807212aa3 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -898,14 +898,14 @@ static void decode_txts(struct dp83640_private *dp83640, struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns; /* We must already have the skb that triggered this. */ - +again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@ -920,6 +920,11 @@ static void decode_txts(struct dp83640_private *dp83640, } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + } ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@ -1472,6 +1477,7 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv; switch (dp83640->hwts_tx_en) { @@ -1484,6 +1490,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break; From ad6f317f720f4a3121756c23831a43dda9b095e5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Feb 2019 13:44:44 +0100 Subject: [PATCH 0565/1436] net/smc: preallocated memory for rdma work requests The work requests for rdma writes are built in local variables within function smc_tx_rdma_write(). This violates the rule that the work request storage has to stay till the work request is confirmed by a completion queue response. This patch introduces preallocated memory for these work requests. The storage is allocated, once a link (and thus a queue pair) is established. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 11 +++-------- net/smc/smc_cdc.h | 8 +++++++- net/smc/smc_core.h | 20 ++++++++++++++++++++ net/smc/smc_llc.c | 3 ++- net/smc/smc_tx.c | 44 ++++++++++++++++++++++---------------------- net/smc/smc_wr.c | 38 +++++++++++++++++++++++++++++++++++++- net/smc/smc_wr.h | 1 + 7 files changed, 92 insertions(+), 33 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 1c5333d494e9..b80ef104ab4e 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@ /********************************** send *************************************/ -struct smc_cdc_tx_pend { - struct smc_connection *conn; /* socket connection */ - union smc_host_cursor cursor; /* tx sndbuf cursor sent */ - union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ - u16 ctrl_seq; /* conn. tx sequence # */ -}; - /* handler for send/transmission completion of a CDC msg */ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); if (!conn->alert_token_local) /* abnormal termination */ @@ -121,7 +116,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); if (rc) return rc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index b5bfe38c7f9b..2148da7a26b1 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -270,10 +270,16 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, smcr_cdc_msg_to_host(local, peer, conn); } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ +}; int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state { FAILED /* ib_wr_reg_mr response: failure */ }; +struct smc_rdma_sge { /* sges for RDMA writes */ + struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per + * message send + */ + +struct smc_rdma_sges { /* sges per message send */ + struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr { /* work requests per message + * send + */ + struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link { struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ struct ib_sge *wr_tx_sges; /* WR send gather meta data */ + struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ + struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link, { int rc; - rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); + rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, + pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 36af3de731b9..2fdfaff60cf9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -266,27 +266,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, - int num_sges, struct ib_sge sges[]) + int num_sges, struct ib_rdma_wr *rdma_wr) { struct smc_link_group *lgr = conn->lgr; - struct ib_rdma_wr rdma_wr; struct smc_link *link; int rc; - memset(&rdma_wr, 0, sizeof(rdma_wr)); link = &lgr->lnk[SMC_SINGLE_LINK]; - rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); - rdma_wr.wr.sg_list = sges; - rdma_wr.wr.num_sge = num_sges; - rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; - rdma_wr.remote_addr = + rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); + rdma_wr->wr.num_sge = num_sges; + rdma_wr->remote_addr = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + /* RMBE within RMB */ conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; - rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; + rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_terminate(lgr); @@ -313,24 +309,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, /* SMC-R helper for smc_tx_rdma_writes() */ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, size_t src_off, size_t src_len, - size_t dst_off, size_t dst_len) + size_t dst_off, size_t dst_len, + struct smc_rdma_wr *wr_rdma_buf) { dma_addr_t dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int src_len_sum = src_len, dst_len_sum = dst_len; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; int sent_count = src_off; int srcchunk, dstchunk; int num_sges; int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { + struct ib_sge *sge = + wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].length = src_len; num_sges++; src_off += src_len; @@ -343,7 +340,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, + &wr_rdma_buf->wr_tx_rdma[dstchunk]); if (rc) return rc; if (dst_len_sum == len) @@ -402,7 +400,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ -static int smc_tx_rdma_writes(struct smc_connection *conn) +static int smc_tx_rdma_writes(struct smc_connection *conn, + struct smc_rdma_wr *wr_rdma_buf) { size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; @@ -463,7 +462,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_off, dst_len); else rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, - dst_off, dst_len); + dst_off, dst_len, wr_rdma_buf); if (rc) return rc; @@ -484,11 +483,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; + struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -506,7 +506,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], (struct smc_wr_tx_pend_priv *)pend); @@ -533,7 +533,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) spin_lock_bh(&conn->send_lock); if (!pflags->urg_data_present) - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, NULL); if (!rc) rc = smcd_cdc_msg_send(conn); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 1dc88c32d6bb..253aa75dc2b6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) * @link: Pointer to smc_link used to later send the message. * @handler: Send completion handler function pointer. * @wr_buf: Out value returns pointer to message buffer. + * @wr_rdma_buf: Out value returns pointer to rdma work request. * @wr_pend_priv: Out value returns pointer serving as handler context. * * Return: 0 on success, or -errno on error. @@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; @@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, wr_ib = &link->wr_tx_ibs[idx]; wr_ib->wr_id = wr_id; *wr_buf = &link->wr_tx_bufs[idx]; + if (wr_rdma_buf) + *wr_rdma_buf = &link->wr_tx_rdmas[idx]; *wr_pend_priv = &wr_pend->priv; return 0; } @@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; lnk->wr_tx_ibs[i].next = NULL; lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = @@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; + kfree(lnk->wr_tx_rdma_sges); + lnk->wr_tx_rdma_sges = NULL; kfree(lnk->wr_rx_sges); lnk->wr_rx_sges = NULL; + kfree(lnk->wr_tx_rdmas); + lnk->wr_tx_rdmas = NULL; kfree(lnk->wr_rx_ibs); lnk->wr_rx_ibs = NULL; kfree(lnk->wr_tx_ibs); @@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; + link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdmas[0]), + GFP_KERNEL); + if (!link->wr_tx_rdmas) + goto no_mem_wr_rx_ibs; + link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdma_sges[0]), + GFP_KERNEL); + if (!link->wr_tx_rdma_sges) + goto no_mem_wr_tx_rdmas; link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) - goto no_mem_wr_rx_ibs; + goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, sizeof(link->wr_rx_sges[0]), GFP_KERNEL); @@ -579,6 +611,10 @@ no_mem_wr_rx_sges: kfree(link->wr_rx_sges); no_mem_wr_tx_sges: kfree(link->wr_tx_sges); +no_mem_wr_tx_rdma_sges: + kfree(link->wr_tx_rdma_sges); +no_mem_wr_tx_rdmas: + kfree(link->wr_tx_rdmas); no_mem_wr_rx_ibs: kfree(link->wr_rx_ibs); no_mem_wr_tx_ibs: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 1d85bb14fd6f..09bf32fd3959 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev); int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); From b8649efad879c69c7ab1f19ce8814fcabef1f72b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Feb 2019 13:44:45 +0100 Subject: [PATCH 0566/1436] net/smc: fix sender_free computation In some scenarios a separate consumer cursor update is necessary. The decision is made in smc_tx_consumer_cursor_update(). The sender_free computation could be wrong: The rx confirmed cursor is always smaller than or equal to the rx producer cursor. The parameters in the smc_curs_diff() call have to be exchanged, otherwise sender_free might even be negative. And if more data arrives local_rx_ctrl.prod might be updated, enabling a cursor difference between local_rx_ctrl.prod and rx confirmed cursor larger than the RMB size. This case is not covered by smc_curs_diff(). Thus function smc_curs_diff_large() is introduced here. If a recvmsg() is processed in parallel, local_tx_ctrl.cons might change during smc_cdc_msg_send. Make sure rx_curs_confirmed is updated with the actually sent local_tx_ctrl.cons value. Fixes: e82f2e31f559 ("net/smc: optimize consumer cursor updates") Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 5 +++-- net/smc/smc_cdc.h | 26 +++++++++++++++++++++++++- net/smc/smc_tx.c | 3 ++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b80ef104ab4e..a712c9f8699b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -91,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + union smc_host_cursor cfed; struct smc_link *link; int rc; @@ -102,10 +103,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, &conn->local_tx_ctrl, conn); + smc_curs_copy(&cfed, &((struct smc_host_cdc_msg *)wr_buf)->cons, conn); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); return rc; } diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 2148da7a26b1..271e2524dc8f 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, #endif } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@ -185,6 +187,28 @@ static inline int smc_curs_comp(unsigned int size, return smc_curs_diff(size, old, new); } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) +{ + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); +} + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, struct smc_connection *conn) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 2fdfaff60cf9..f93f3580c100 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -595,7 +595,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) if (to_confirm > conn->rmbe_update_limit) { smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); sender_free = conn->rmb_desc->len - - smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + smc_curs_diff_large(conn->rmb_desc->len, + &cfed, &prod); } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || From a5e04318c83a31925300af1ce358dbc1a708b732 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Feb 2019 13:44:46 +0100 Subject: [PATCH 0567/1436] net/smc: delete rkey first before switching to unused Once RMBs are flagged as unused they are candidates for reuse. Thus the LLC DELETE RKEY operaton should be made before flagging the RMB as unused. Fixes: c7674c001b11 ("net/smc: unregister rkeys of unused buffer") Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 097c798983ca..aa1c551cee81 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -302,13 +302,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); From 84b799a292ebc03de388f8573a169de6eb12c340 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Feb 2019 13:44:47 +0100 Subject: [PATCH 0568/1436] net/smc: correct state change for peer closing If some kind of closing is received from the peer while still in state SMC_INIT, it means the peer has had an active connection and closed the socket quickly before listen_work finished. This should not result in a shortcut from state SMC_INIT to state SMC_CLOSED. This patch adds the socket to the accept queue in state SMC_APPCLOSEWAIT1. The socket reaches state SMC_CLOSED once being accepted and closed with smc_release(). Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..e39cadda1bf5 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; From c1f7e02979edd7a3a3e69fe04be60b1d650dc8a7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 4 Feb 2019 14:50:38 +0000 Subject: [PATCH 0569/1436] net: cls_flower: Remove filter from mask before freeing it In fl_change(), when adding a new rule (i.e. fold == NULL), a driver may reject the new rule, for example due to resource exhaustion. By that point, the new rule was already assigned a mask, and it was added to that mask's hash table. The clean-up path that's invoked as a result of the rejection however neglects to undo the hash table addition, and proceeds to free the new rule, thus leaving a dangling pointer in the hash table. Fix by removing fnew from the mask's hash table before it is freed. Fixes: 35cc3cefc4de ("net/sched: cls_flower: Reject duplicated rules also under skip_sw") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6aa57fbbbaf..12ca9d13db83 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1371,7 +1371,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_mask; + goto errout_mask_ht; } if (!tc_in_hw(fnew->flags)) @@ -1401,6 +1401,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(mask); return 0; +errout_mask_ht: + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + errout_mask: fl_mask_put(head, fnew->mask, false); From 960587285a56ec3cafb4d1e6b25c19eced4d0bce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:16:59 +0100 Subject: [PATCH 0570/1436] netfilter: nat: remove module dependency on ipv6 core nf_nat_ipv6 calls two ipv6 core functions, so add those to v6ops to avoid the module dependency. This is a prerequisite for merging ipv4 and ipv6 nat implementations. Add wrappers to avoid the indirection if ipv6 is builtin. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 6 ++++++ net/ipv6/netfilter.c | 4 ++++ net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 17 ++++++++++++++++- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 21 +++++++++++++++++++-- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..ad4223c10488 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -33,6 +33,12 @@ struct nf_ipv6_ops { int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); +#if IS_MODULE(CONFIG_IPV6) + int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*dev_get_saddr)(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr); +#endif }; #ifdef CONFIG_NETFILTER diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..0a5caf263889 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -112,6 +112,10 @@ static const struct nf_ipv6_ops ipv6ops = { .fragment = ip6_fragment, .route = nf_ip6_route, .reroute = nf_ip6_reroute, +#if IS_MODULE(CONFIG_IPV6) + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, +#endif }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..b52026adb3e7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } +static int nat_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -333,7 +348,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = nat_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 0ad0da5a2600..fd313b726263 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -24,6 +24,23 @@ static atomic_t v6_worker_count; +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:17:00 +0100 Subject: [PATCH 0571/1436] netfilter: ipv6: avoid indirect calls for IPV6=y case indirect calls are only needed if ipv6 is a module. Add helpers to abstract the v6ops indirections and use them instead. fragment, reroute and route_input are kept as indirect calls. The first two are not not used in hot path and route_input is only used by bridge netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 64 ++++++++++++++++++++++++------- net/ipv6/netfilter.c | 15 ++++---- net/ipv6/netfilter/nft_fib_ipv6.c | 9 +---- net/netfilter/utils.c | 6 +-- net/netfilter/xt_addrtype.c | 16 +++----- 5 files changed, 68 insertions(+), 42 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index ad4223c10488..471e9467105b 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,29 +25,24 @@ struct nf_queue_entry; * if IPv6 is a module. */ struct nf_ipv6_ops { +#if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) int (*route_me_harder)(struct net *net, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); + int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict); #endif + void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); + int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); -__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +#include extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) @@ -55,6 +50,49 @@ static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) return rcu_dereference(nf_ipv6_ops); } +static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->chk_addr(net, addr, dev, strict); +#else + return ipv6_chk_addr(net, addr, dev, strict); +#endif +} + +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict); + +static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + if (v6ops) + return v6ops->route(net, dst, fl, strict); + + return -EHOSTUNREACH; +#endif +#if IS_BUILTIN(CONFIG_IPV6) + return __nf_ip6_route(net, dst, fl, strict); +#else + return -EHOSTUNREACH; +#endif +} + +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); + +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0a5caf263889..a8263031f3a6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .route = nf_ip6_route, - .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) + .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, #endif + .route_input = ip6_route_input, + .fragment = ip6_fragment, + .reroute = nf_ip6_reroute, }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 55af9f247993..06dc55590441 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops; + const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { @@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->route(net, dst, fl, strict); + ret = nf_ip6_route(net, dst, fl, strict); break; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 89e281b3bfc2..29987ff03621 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - v6ops = nf_get_ipv6_ops(); - if (v6ops) { - if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - if (v6ops->chk_addr(net, addr, dev, true)) - ret = XT_ADDRTYPE_LOCAL; - } - route_err = v6ops->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); - } else { - route_err = 1; + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + if (nf_ipv6_chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; } + route_err = nf_ip6_route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; From 8461ef8b7ef286212ca954d8b82dac3ceecb219d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 1 Feb 2019 16:14:14 -0800 Subject: [PATCH 0572/1436] tools/bpf: move libbpf pr_* debug print functions to headers A global function libbpf_print, which is invisible outside the shared library, is defined to print based on levels. The pr_warning, pr_info and pr_debug macros are moved into the newly created header common.h. So any .c file including common.h can use these macros directly. Currently btf__new and btf_ext__new API has an argument getting __pr_debug function pointer into btf.c so the debugging information can be printed there. This patch removed this parameter from btf__new and btf_ext__new and directly using pr_debug in btf.c. Another global function libbpf_print_level_available, also invisible outside the shared library, can test whether a particular level debug printing is available or not. It is used in btf.c to test whether DEBUG level debug printing is availabl or not, based on which the log buffer will be allocated when loading btf to the kernel. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 97 +++++++++++++++++------------------ tools/lib/bpf/btf.h | 7 +-- tools/lib/bpf/libbpf.c | 46 ++++++++++++----- tools/lib/bpf/libbpf.h | 6 +++ tools/lib/bpf/libbpf_util.h | 32 ++++++++++++ tools/lib/bpf/test_libbpf.cpp | 2 +- 6 files changed, 120 insertions(+), 70 deletions(-) create mode 100644 tools/lib/bpf/libbpf_util.h diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d682d3b8f7b9..93e792b82242 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -9,8 +9,9 @@ #include #include "btf.h" #include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" -#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -107,54 +108,54 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) return 0; } -static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_hdr(struct btf *btf) { const struct btf_header *hdr = btf->hdr; __u32 meta_left; if (btf->data_size < sizeof(struct btf_header)) { - elog("BTF header not found\n"); + pr_debug("BTF header not found\n"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF version:%u\n", hdr->version); + pr_debug("Unsupported BTF version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF flags:%x\n", hdr->flags); return -ENOTSUP; } meta_left = btf->data_size - sizeof(*hdr); if (!meta_left) { - elog("BTF has no data\n"); + pr_debug("BTF has no data\n"); return -EINVAL; } if (meta_left < hdr->type_off) { - elog("Invalid BTF type section offset:%u\n", hdr->type_off); + pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); return -EINVAL; } if (meta_left < hdr->str_off) { - elog("Invalid BTF string section offset:%u\n", hdr->str_off); + pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); return -EINVAL; } if (hdr->type_off >= hdr->str_off) { - elog("BTF type section offset >= string section offset. No type?\n"); + pr_debug("BTF type section offset >= string section offset. No type?\n"); return -EINVAL; } if (hdr->type_off & 0x02) { - elog("BTF type section is not aligned to 4 bytes\n"); + pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } @@ -163,7 +164,7 @@ static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_str_sec(struct btf *btf) { const struct btf_header *hdr = btf->hdr; const char *start = btf->nohdr_data + hdr->str_off; @@ -171,7 +172,7 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || start[0] || end[-1]) { - elog("Invalid BTF string section\n"); + pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -180,7 +181,7 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; void *nohdr_data = btf->nohdr_data; @@ -219,7 +220,7 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) case BTF_KIND_RESTRICT: break; default: - elog("Unsupported BTF_KIND:%u\n", + pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); return -EINVAL; } @@ -363,7 +364,7 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf *btf__new(__u8 *data, __u32 size) { __u32 log_buf_size = 0; char *log_buf = NULL; @@ -376,7 +377,7 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) btf->fd = -1; - if (err_log) { + if (libbpf_print_level_available(LIBBPF_DEBUG)) { log_buf = malloc(BPF_LOG_BUF_SIZE); if (!log_buf) { err = -ENOMEM; @@ -400,21 +401,21 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (btf->fd == -1) { err = -errno; - elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); + pr_debug("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (log_buf && *log_buf) - elog("%s\n", log_buf); + pr_debug("%s\n", log_buf); goto done; } - err = btf_parse_hdr(btf, err_log); + err = btf_parse_hdr(btf); if (err) goto done; - err = btf_parse_str_sec(btf, err_log); + err = btf_parse_str_sec(btf); if (err) goto done; - err = btf_parse_type_sec(btf, err_log); + err = btf_parse_type_sec(btf); done: free(log_buf); @@ -491,7 +492,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; @@ -514,8 +515,7 @@ struct btf_ext_sec_copy_param { static int btf_ext_copy_info(struct btf_ext *btf_ext, __u8 *data, __u32 data_size, - struct btf_ext_sec_copy_param *ext_sec, - btf_print_fn_t err_log) + struct btf_ext_sec_copy_param *ext_sec) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; const struct btf_ext_info_sec *sinfo; @@ -529,14 +529,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, data_size -= hdr->hdr_len; if (ext_sec->off & 0x03) { - elog(".BTF.ext %s section is not aligned to 4 bytes\n", + pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", ext_sec->desc); return -EINVAL; } if (data_size < ext_sec->off || ext_sec->len > data_size - ext_sec->off) { - elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } @@ -546,7 +546,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, /* At least a record size */ if (info_left < sizeof(__u32)) { - elog(".BTF.ext %s record size not found\n", ext_sec->desc); + pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); return -EINVAL; } @@ -554,7 +554,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, record_size = *(__u32 *)info; if (record_size < ext_sec->min_rec_size || record_size & 0x03) { - elog("%s section in .BTF.ext has invalid record size %u\n", + pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); return -EINVAL; } @@ -564,7 +564,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - elog("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); return -EINVAL; } @@ -574,14 +574,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, __u32 num_records; if (info_left < sec_hdrlen) { - elog("%s section header is not found in .BTF.ext\n", + pr_debug("%s section header is not found in .BTF.ext\n", ext_sec->desc); return -EINVAL; } num_records = sinfo->num_info; if (num_records == 0) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -589,7 +589,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -610,8 +610,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, } static int btf_ext_copy_func_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) + __u8 *data, __u32 data_size) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; struct btf_ext_sec_copy_param param = { @@ -622,12 +621,11 @@ static int btf_ext_copy_func_info(struct btf_ext *btf_ext, .desc = "func_info" }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_copy_info(btf_ext, data, data_size, ¶m); } static int btf_ext_copy_line_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) + __u8 *data, __u32 data_size) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; struct btf_ext_sec_copy_param param = { @@ -638,37 +636,36 @@ static int btf_ext_copy_line_info(struct btf_ext *btf_ext, .desc = "line_info", }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_copy_info(btf_ext, data, data_size, ¶m); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; if (data_size < offsetof(struct btf_ext_header, func_info_off) || data_size < hdr->hdr_len) { - elog("BTF.ext header not found"); + pr_debug("BTF.ext header not found"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF.ext magic:%x\n", hdr->magic); + pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF.ext version:%u\n", hdr->version); + pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); return -ENOTSUP; } if (data_size == hdr->hdr_len) { - elog("BTF.ext has no data\n"); + pr_debug("BTF.ext has no data\n"); return -EINVAL; } @@ -685,12 +682,12 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext); } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf_ext *btf_ext__new(__u8 *data, __u32 size) { struct btf_ext *btf_ext; int err; - err = btf_ext_parse_hdr(data, size, err_log); + err = btf_ext_parse_hdr(data, size); if (err) return ERR_PTR(err); @@ -698,13 +695,13 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (!btf_ext) return ERR_PTR(-ENOMEM); - err = btf_ext_copy_func_info(btf_ext, data, size, err_log); + err = btf_ext_copy_func_info(btf_ext, data, size); if (err) { btf_ext__free(btf_ext); return ERR_PTR(err); } - err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + err = btf_ext_copy_line_info(btf_ext, data, size); if (err) { btf_ext__free(btf_ext); return ERR_PTR(err); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b0610dcdae6b..b1e8e54cc21d 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -55,11 +55,8 @@ struct btf_ext_header { __u32 line_info_len; }; -typedef int (*btf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); - LIBBPF_API void btf__free(struct btf *btf); -LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, @@ -70,7 +67,7 @@ LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +struct btf_ext *btf_ext__new(__u8 *data, __u32 size); void btf_ext__free(struct btf_ext *btf_ext); int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 03bc01ca2577..eeba77b695ad 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -42,6 +42,7 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" +#include "libbpf_util.h" #ifndef EM_BPF #define EM_BPF 247 @@ -69,16 +70,6 @@ static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; static __printf(1, 2) libbpf_print_fn_t __pr_debug; -#define __pr(func, fmt, ...) \ -do { \ - if ((func)) \ - (func)("libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) - void libbpf_set_print(libbpf_print_fn_t warn, libbpf_print_fn_t info, libbpf_print_fn_t debug) @@ -88,6 +79,35 @@ void libbpf_set_print(libbpf_print_fn_t warn, __pr_debug = debug; } +__printf(2, 3) +void libbpf_print(enum libbpf_print_level level, const char *format, ...) +{ + va_list args; + + va_start(args, format); + if (level == LIBBPF_WARN) { + if (__pr_warning) + __pr_warning(format, args); + } else if (level == LIBBPF_INFO) { + if (__pr_info) + __pr_info(format, args); + } else { + if (__pr_debug) + __pr_debug(format, args); + } + va_end(args); +} + +bool libbpf_print_level_available(enum libbpf_print_level level) +{ + if (level == LIBBPF_WARN) + return !!__pr_warning; + else if (level == LIBBPF_INFO) + return !!__pr_info; + else + return !!__pr_debug; +} + #define STRERR_BUFSIZE 128 #define CHECK_ERR(action, err, out) do { \ @@ -839,8 +859,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size, - __pr_debug); + obj->btf = btf__new(data->d_buf, data->d_size); if (IS_ERR(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); @@ -915,8 +934,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_EXT_ELF_SEC, BTF_ELF_SEC); } else { obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size, - __pr_debug); + btf_ext_data->d_size); if (IS_ERR(obj->btf_ext)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_EXT_ELF_SEC, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 43c77e98df6f..0fb32cc04633 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -47,6 +47,12 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + /* * __printf is defined in include/linux/compiler-gcc.h. However, * it would be better if libbpf.h didn't depend on Linux header files. diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h new file mode 100644 index 000000000000..0fdc3b1d0e33 --- /dev/null +++ b/tools/lib/bpf/libbpf_util.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __LIBBPF_LIBBPF_UTIL_H +#define __LIBBPF_LIBBPF_UTIL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +extern bool libbpf_print_level_available(enum libbpf_print_level level); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp index abf3fc25c9fa..be67f5ea2c19 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.cpp @@ -14,5 +14,5 @@ int main(int argc, char *argv[]) bpf_prog_get_fd_by_id(0); /* btf.h */ - btf__new(NULL, 0, NULL); + btf__new(NULL, 0); } From 9d100a19ffa519b17a0e998918337da5386e47fb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 1 Feb 2019 16:14:15 -0800 Subject: [PATCH 0573/1436] tools/bpf: print out btf log at LIBBPF_WARN level Currently, the btf log is allocated and printed out in case of error at LIBBPF_DEBUG level. Such logs from kernel are very important for debugging. For example, bpf syscall BPF_PROG_LOAD command can get verifier logs back to user space. In function load_program() of libbpf.c, the log buffer is allocated unconditionally and printed out at pr_warning() level. Let us do the similar thing here for btf. Allocate buffer unconditionally and print out error logs at pr_warning() level. This can reduce one global function and optimize for common situations where pr_warning() is activated either by default or by user supplied debug output function. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 19 +++++++++---------- tools/lib/bpf/libbpf.c | 10 ---------- tools/lib/bpf/libbpf_util.h | 2 -- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 93e792b82242..51a0db05bf80 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -377,16 +377,15 @@ struct btf *btf__new(__u8 *data, __u32 size) btf->fd = -1; - if (libbpf_print_level_available(LIBBPF_DEBUG)) { - log_buf = malloc(BPF_LOG_BUF_SIZE); - if (!log_buf) { - err = -ENOMEM; - goto done; - } - *log_buf = 0; - log_buf_size = BPF_LOG_BUF_SIZE; + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) { + err = -ENOMEM; + goto done; } + *log_buf = 0; + log_buf_size = BPF_LOG_BUF_SIZE; + btf->data = malloc(size); if (!btf->data) { err = -ENOMEM; @@ -401,9 +400,9 @@ struct btf *btf__new(__u8 *data, __u32 size) if (btf->fd == -1) { err = -errno; - pr_debug("Error loading BTF: %s(%d)\n", strerror(errno), errno); + pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (log_buf && *log_buf) - pr_debug("%s\n", log_buf); + pr_warning("%s\n", log_buf); goto done; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index eeba77b695ad..0354af03b038 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -98,16 +98,6 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) va_end(args); } -bool libbpf_print_level_available(enum libbpf_print_level level) -{ - if (level == LIBBPF_WARN) - return !!__pr_warning; - else if (level == LIBBPF_INFO) - return !!__pr_info; - else - return !!__pr_debug; -} - #define STRERR_BUFSIZE 128 #define CHECK_ERR(action, err, out) do { \ diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 0fdc3b1d0e33..81ecda0cb9c9 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -14,8 +14,6 @@ extern void libbpf_print(enum libbpf_print_level level, const char *format, ...) __attribute__((format(printf, 2, 3))); -extern bool libbpf_print_level_available(enum libbpf_print_level level); - #define __pr(level, fmt, ...) \ do { \ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ From 6f1ae8b6628b9e054d3a8c959cf472234944a578 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 1 Feb 2019 16:14:17 -0800 Subject: [PATCH 0574/1436] tools/bpf: simplify libbpf API function libbpf_set_print() Currently, the libbpf API function libbpf_set_print() takes three function pointer parameters for warning, info and debug printout respectively. This patch changes the API to have just one function pointer parameter and the function pointer has one additional parameter "debugging level". So if in the future, if the debug level is increased, the function signature won't change. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 33 +++++++---------- tools/lib/bpf/libbpf.h | 14 +++----- tools/lib/bpf/test_libbpf.cpp | 2 +- tools/perf/util/bpf-loader.c | 32 +++++++---------- tools/testing/selftests/bpf/test_btf.c | 7 ++-- .../testing/selftests/bpf/test_libbpf_open.c | 36 +++++++++---------- tools/testing/selftests/bpf/test_progs.c | 20 +++++++++-- 7 files changed, 68 insertions(+), 76 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0354af03b038..ce209ab9a1a2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -54,29 +54,26 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(1, 2) -static int __base_pr(const char *format, ...) +__printf(2, 3) +static int __base_pr(enum libbpf_print_level level, const char *format, ...) { va_list args; int err; + if (level == LIBBPF_DEBUG) + return 0; + va_start(args, format); err = vfprintf(stderr, format, args); va_end(args); return err; } -static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_debug; +static __printf(2, 3) libbpf_print_fn_t __libbpf_pr = __base_pr; -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug) +void libbpf_set_print(libbpf_print_fn_t fn) { - __pr_warning = warn; - __pr_info = info; - __pr_debug = debug; + __libbpf_pr = fn; } __printf(2, 3) @@ -84,17 +81,11 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; + if (!__libbpf_pr) + return; + va_start(args, format); - if (level == LIBBPF_WARN) { - if (__pr_warning) - __pr_warning(format, args); - } else if (level == LIBBPF_INFO) { - if (__pr_info) - __pr_info(format, args); - } else { - if (__pr_debug) - __pr_debug(format, args); - } + __libbpf_pr(level, format, args); va_end(args); } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0fb32cc04633..19dbc1bed960 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -53,17 +53,11 @@ enum libbpf_print_level { LIBBPF_DEBUG, }; -/* - * __printf is defined in include/linux/compiler-gcc.h. However, - * it would be better if libbpf.h didn't depend on Linux header files. - * So instead of __printf, here we use gcc attribute directly. - */ -typedef int (*libbpf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); +typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, + const char *, ...) + __attribute__((format(printf, 2, 3))); -LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp index be67f5ea2c19..fc134873bb6d 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.cpp @@ -8,7 +8,7 @@ int main(int argc, char *argv[]) { /* libbpf.h */ - libbpf_set_print(NULL, NULL, NULL); + libbpf_set_print(NULL); /* bpf.h */ bpf_prog_get_fd_by_id(0); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2f3eb6d293ee..38afdbe6a9e0 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -24,21 +24,17 @@ #include "llvm-utils.h" #include "c++/clang-c.h" -#define DEFINE_PRINT_FN(name, level) \ -static int libbpf_##name(const char *fmt, ...) \ -{ \ - va_list args; \ - int ret; \ - \ - va_start(args, fmt); \ - ret = veprintf(level, verbose, pr_fmt(fmt), args);\ - va_end(args); \ - return ret; \ -} +static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), + const char *fmt, ...) +{ + va_list args; + int ret; -DEFINE_PRINT_FN(warning, 1) -DEFINE_PRINT_FN(info, 1) -DEFINE_PRINT_FN(debug, 1) + va_start(args, fmt); + ret = veprintf(1, verbose, pr_fmt(fmt), args); + va_end(args); + return ret; +} struct bpf_prog_priv { bool is_tp; @@ -59,9 +55,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) struct bpf_object *obj; if (!libbpf_initialized) { - libbpf_set_print(libbpf_warning, - libbpf_info, - libbpf_debug); + libbpf_set_print(libbpf_perf_print); libbpf_initialized = true; } @@ -79,9 +73,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) struct bpf_object *obj; if (!libbpf_initialized) { - libbpf_set_print(libbpf_warning, - libbpf_info, - libbpf_debug); + libbpf_set_print(libbpf_perf_print); libbpf_initialized = true; } diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 179f1d8ec5bf..aebaeff5a5a0 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -54,8 +54,9 @@ static int count_result(int err) #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(1, 2) -static int __base_pr(const char *format, ...) +__printf(2, 3) +static int __base_pr(enum libbpf_print_level level __attribute__((unused)), + const char *format, ...) { va_list args; int err; @@ -5650,7 +5651,7 @@ int main(int argc, char **argv) return err; if (args.always_log) - libbpf_set_print(__base_pr, __base_pr, __base_pr); + libbpf_set_print(__base_pr); if (args.raw_test) err |= test_raw(); diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c index 8fcd1c076add..b9ff3bf76544 100644 --- a/tools/testing/selftests/bpf/test_libbpf_open.c +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -34,23 +34,22 @@ static void usage(char *argv[]) printf("\n"); } -#define DEFINE_PRINT_FN(name, enabled) \ -static int libbpf_##name(const char *fmt, ...) \ -{ \ - va_list args; \ - int ret; \ - \ - va_start(args, fmt); \ - if (enabled) { \ - fprintf(stderr, "[" #name "] "); \ - ret = vfprintf(stderr, fmt, args); \ - } \ - va_end(args); \ - return ret; \ +static bool debug = 0; +static int libbpf_debug_print(enum libbpf_print_level level, + const char *fmt, ...) +{ + va_list args; + int ret; + + if (level == LIBBPF_DEBUG && !debug) + return 0; + + va_start(args, fmt); + fprintf(stderr, "[%d] ", level); + ret = vfprintf(stderr, fmt, args); + va_end(args); + return ret; } -DEFINE_PRINT_FN(warning, 1) -DEFINE_PRINT_FN(info, 1) -DEFINE_PRINT_FN(debug, 1) #define EXIT_FAIL_LIBBPF EXIT_FAILURE #define EXIT_FAIL_OPTION 2 @@ -120,15 +119,14 @@ int main(int argc, char **argv) int longindex = 0; int opt; - libbpf_set_print(libbpf_warning, libbpf_info, NULL); + libbpf_set_print(libbpf_debug_print); /* Parse commands line args */ while ((opt = getopt_long(argc, argv, "hDq", long_options, &longindex)) != -1) { switch (opt) { case 'D': - libbpf_set_print(libbpf_warning, libbpf_info, - libbpf_debug); + debug = 1; break; case 'q': /* Use in scripting mode */ verbose = 0; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index a08d026ac396..55d05102e7bf 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -1783,6 +1784,21 @@ static void test_task_fd_query_tp(void) "sys_enter_read"); } +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, ...) +{ + va_list args; + int ret; + + if (level == LIBBPF_DEBUG) + return 0; + + va_start(args, format); + ret = vfprintf(stderr, format, args); + va_end(args); + return ret; +} + static void test_reference_tracking() { const char *file = "./test_sk_lookup_kern.o"; @@ -1809,9 +1825,9 @@ static void test_reference_tracking() /* Expect verifier failure if test name has 'fail' */ if (strstr(title, "fail") != NULL) { - libbpf_set_print(NULL, NULL, NULL); + libbpf_set_print(NULL); err = !bpf_program__load(prog, "GPL", 0); - libbpf_set_print(printf, printf, NULL); + libbpf_set_print(libbpf_debug_print); } else { err = bpf_program__load(prog, "GPL", 0); } From 5065b2dd3e5f9247a6c9d67974bc0472bf561b9d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:06 +0100 Subject: [PATCH 0575/1436] s390/qeth: release cmd buffer in error paths Whenever we fail before/while starting an IO, make sure to release the IO buffer. Usually qeth_irq() would do this for us, but if the IO doesn't even start we obviously won't get an interrupt for it either. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e63e03143ca7..ca62e7d3ee93 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -566,6 +566,7 @@ static int __qeth_issue_next_read(struct qeth_card *card) QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@ -1127,6 +1128,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_release_buffer(iob->channel, iob); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@ -1809,6 +1812,7 @@ static int qeth_idx_activate_get_answer(struct qeth_card *card, QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -1878,6 +1882,7 @@ static int qeth_idx_activate_channel(struct qeth_card *card, rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -2058,6 +2063,7 @@ int qeth_send_control_data(struct qeth_card *card, int len, } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb; From afa0c5904ba16d59b0454f7ee4c807dae350f432 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:07 +0100 Subject: [PATCH 0576/1436] s390/qeth: fix use-after-free in error path The error path in qeth_alloc_qdio_buffers() that takes care of cleaning up the Output Queues is buggy. It first frees the queue, but then calls qeth_clear_outq_buffers() with that very queue struct. Make the call to qeth_clear_outq_buffers() part of the free action (in the correct order), and while at it fix the naming of the helper. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ca62e7d3ee93..30679bfc8a1b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2395,11 +2395,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) return 0; } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return; + qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2473,10 +2474,8 @@ out_freeoutqbufs: card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2509,10 +2508,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } From c2780c1a3fb724560b1d44f7976e0de17bf153c7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:08 +0100 Subject: [PATCH 0577/1436] s390/qeth: cancel close_dev work before removing a card A card's close_dev work is scheduled on a driver-wide workqueue. If the card is removed and freed while the work is still active, this causes a use-after-free. So make sure that the work is completed before freeing the card. Fixes: 0f54761d167f ("qeth: Support VEPA mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 0ee026947f20..1cf45ace0dd0 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f108d4b44605..9fec0117fc00 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -801,6 +801,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 42a7cdc59b76..5e810561cb12 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2338,6 +2338,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev); + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); From c0a2e4d10d9366ada133a8ae4ff2f32397f8b15b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:09 +0100 Subject: [PATCH 0578/1436] s390/qeth: conclude all event processing before offlining a card Work for Bridgeport events is currently placed on a driver-wide workqueue. If the card is removed and freed while any such work is still active, this causes a use-after-free. So put the events on a per-card queue, where we can control their lifetime. As we also don't want stale events to last beyond an offline & online cycle, flush this queue when setting the card offline. Fixes: b4d72c08b358 ("qeth: bridgeport support - basic control") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 10 ++++++++-- drivers/s390/net/qeth_l2_main.c | 6 ++++-- drivers/s390/net/qeth_l3_main.c | 2 ++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 1cf45ace0dd0..122059ecad84 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -790,6 +790,7 @@ struct qeth_card { struct qeth_seqno seqno; struct qeth_card_options options; + struct workqueue_struct *event_wq; wait_queue_head_t wait_q; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -963,7 +964,6 @@ extern const struct attribute_group *qeth_osn_attr_groups[]; extern const struct attribute_group qeth_device_attr_group; extern const struct attribute_group qeth_device_blkt_group; extern const struct device_type qeth_generic_devtype; -extern struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *); const char *qeth_get_cardname_short(struct qeth_card *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 30679bfc8a1b..89f912213e62 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -74,8 +74,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); -struct workqueue_struct *qeth_wq; -EXPORT_SYMBOL_GPL(qeth_wq); +static struct workqueue_struct *qeth_wq; int qeth_card_hw_is_reachable(struct qeth_card *card) { @@ -1469,6 +1468,10 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev) CARD_RDEV(card) = gdev->cdev[0]; CARD_WDEV(card) = gdev->cdev[1]; CARD_DDEV(card) = gdev->cdev[2]; + + card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev)); + if (!card->event_wq) + goto out_wq; if (qeth_setup_channel(&card->read, true)) goto out_ip; if (qeth_setup_channel(&card->write, true)) @@ -1484,6 +1487,8 @@ out_data: out_channel: qeth_clean_channel(&card->read); out_ip: + destroy_workqueue(card->event_wq); +out_wq: dev_set_drvdata(&gdev->dev, NULL); kfree(card); out: @@ -5031,6 +5036,7 @@ static void qeth_core_free_card(struct qeth_card *card) qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); qeth_clean_channel(&card->data); + destroy_workqueue(card->event_wq); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); dev_set_drvdata(&card->gdev->dev, NULL); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9fec0117fc00..a43de2f9bcac 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -369,6 +369,8 @@ static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); } static int qeth_l2_process_inbound_buffer(struct qeth_card *card, @@ -1436,7 +1438,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, data->card = card; memcpy(&data->qports, qports, sizeof(struct qeth_sbp_state_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); } struct qeth_bridge_host_data { @@ -1508,7 +1510,7 @@ static void qeth_bridge_host_event(struct qeth_card *card, data->card = card; memcpy(&data->hostevs, hostevs, sizeof(struct qeth_ipacmd_addr_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); } /* SETBRIDGEPORT support; sending commands */ diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 5e810561cb12..df34bff4ac31 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1433,6 +1433,8 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode) qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); } /* From ecc15f113c8e8748cc304ed6d8beb825a432b34c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 4 Feb 2019 16:44:55 +0100 Subject: [PATCH 0579/1436] s390: bpf: fix JMP32 code-gen Commit 626a5f66da0d19 ("s390: bpf: implement jitting of JMP32") added JMP32 code-gen support for s390. However it triggers the warning below due to some unusual gotos in the original s390 bpf jit code. Add a couple of additional "is_jmp32" initializations to fix this. Also fix the wrong opcode for the "llilf" instruction that was introduced with the same commit. arch/s390/net/bpf_jit_comp.c: In function 'bpf_jit_insn': arch/s390/net/bpf_jit_comp.c:248:55: warning: 'is_jmp32' may be used uninitialized in this function [-Wmaybe-uninitialized] _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ ^ arch/s390/net/bpf_jit_comp.c:1211:8: note: 'is_jmp32' was declared here bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; Fixes: 626a5f66da0d19 ("s390: bpf: implement jitting of JMP32") Cc: Jiong Wang Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Acked-by: Jiong Wang Acked-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index ce9defdff62a..51dd0267d014 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1154,7 +1154,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i mask = 0x7000; /* jnz */ if (BPF_CLASS(insn->code) == BPF_JMP32) { /* llilf %w1,imm (load zero extend imm) */ - EMIT6_IMM(0xc0010000, REG_W1, imm); + EMIT6_IMM(0xc00f0000, REG_W1, imm); /* nr %w1,%dst */ EMIT2(0x1400, REG_W1, dst_reg); } else { @@ -1216,6 +1216,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i REG_W1, dst_reg, src_reg); goto branch_oc; branch_ks: + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; /* lgfi %w1,imm (load sign extend imm) */ EMIT6_IMM(0xc0010000, REG_W1, imm); /* crj or cgrj %dst,%w1,mask,off */ @@ -1223,6 +1224,7 @@ branch_ks: dst_reg, REG_W1, i, off, mask); break; branch_ku: + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; /* lgfi %w1,imm (load sign extend imm) */ EMIT6_IMM(0xc0010000, REG_W1, imm); /* clrj or clgrj %dst,%w1,mask,off */ @@ -1230,11 +1232,13 @@ branch_ku: dst_reg, REG_W1, i, off, mask); break; branch_xs: + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; /* crj or cgrj %dst,%src,mask,off */ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), dst_reg, src_reg, i, off, mask); break; branch_xu: + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; /* clrj or clgrj %dst,%src,mask,off */ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), dst_reg, src_reg, i, off, mask); From 843cf70ed29a7fb51f1e796c1d6e1ba3620250ac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Feb 2019 15:48:03 -0300 Subject: [PATCH 0580/1436] perf symbols: Add fallback definitions for GELF_ST_VISIBILITY() Those aren't present in Alpine Linux 3.4 to edge, so provide fallback defines to get the next patch building there keeping the build bisectable. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Michael Petlan Cc: Namhyung Kim Cc: Nick Clifton Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-03cg3gya2ju4ba2x6ibb9fuz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 66a84d5846c8..695a73940329 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -19,6 +19,20 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef ELF32_ST_VISIBILITY +#define ELF32_ST_VISIBILITY(o) ((o) & 0x03) +#endif + +/* For ELF64 the definitions are the same. */ +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o) +#endif + +/* How to extract information held in the st_other field. */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(val) ELF64_ST_VISIBILITY (val) +#endif + typedef Elf64_Nhdr GElf_Nhdr; #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT From 59a17706915fe5ea6f711e1f92d4fb706bce07fe Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Jan 2019 14:35:26 +0100 Subject: [PATCH 0581/1436] perf symbols: Filter out hidden symbols from labels When perf is built with the annobin plugin (RHEL8 build) extra symbols are added to its binary: # nm perf | grep annobin | head -10 0000000000241100 t .annobin_annotate.c 0000000000326490 t .annobin_annotate.c 0000000000249255 t .annobin_annotate.c_end 00000000003283a8 t .annobin_annotate.c_end 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bc3e2 t .annobin_annotate.c_end.unlikely 00000000001bc400 t .annobin_annotate.c_end.unlikely 00000000001bce18 t .annobin_annotate.c.hot 00000000001bce18 t .annobin_annotate.c.hot ... Those symbols have no use for report or annotation and should be skipped. Moreover they interfere with the DWARF unwind test on the PPC arch, where they are mixed with checked symbols and then the test fails: # perf test dwarf -v 59: Test dwarf unwind : --- start --- test child forked, pid 8515 unwind: .annobin_dwarf_unwind.c:ip = 0x10dba40dc (0x2740dc) ... got: .annobin_dwarf_unwind.c 0x10dba40dc, expecting test__arch_unwind_sample unwind: failed with 'no error' The annobin symbols are defined as NOTYPE/LOCAL/HIDDEN: # readelf -s ./perf | grep annobin | head -1 40: 00000000001bce4f 0 NOTYPE LOCAL HIDDEN 13 .annobin_init.c They can still pass the check for the label symbol. Adding check for HIDDEN and INTERNAL (as suggested by Nick below) visibility and filter out such symbols. > Just to be awkward, if you are going to ignore STV_HIDDEN > symbols then you should probably also ignore STV_INTERNAL ones > as well... Annobin does not generate them, but you never know, > one day some other tool might create some. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Masami Hiramatsu Cc: Michael Petlan Cc: Namhyung Kim Cc: Nick Clifton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190128133526.GD15461@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 695a73940329..dca7dfae69ad 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -101,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -125,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__filter(GElf_Sym *sym) From 6ab3bc240ade47a0f52bc16d97edd9accbe0024e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 15:12:34 +0100 Subject: [PATCH 0582/1436] perf trace: Support multiple "vfs_getname" probes With a suitably defined "probe:vfs_getname" probe, 'perf trace' can "beautify" its output, so syscalls like open() or openat() can print the "filename" argument instead of just its hex address, like: $ perf trace -e open -- touch /dev/null [...] 0.590 ( 0.014 ms): touch/18063 open(filename: /dev/null, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 [...] The output without such beautifier looks like: 0.529 ( 0.011 ms): touch/18075 open(filename: 0xc78cf288, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 However, when the vfs_getname probe expands to multiple probes and it is not the first one that is hit, the beautifier fails, as following: 0.326 ( 0.010 ms): touch/18072 open(filename: , flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 Fix it by hooking into all the expanded probes (inlines), now, for instance: [root@quaco ~]# perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) [root@quaco ~]# perf trace -e open* sleep 1 0.010 ( 0.005 ms): sleep/5588 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC) = 3 0.029 ( 0.006 ms): sleep/5588 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC) = 3 0.194 ( 0.008 ms): sleep/5588 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3 [root@quaco ~]# Works, further verified with: [root@quaco ~]# perf test vfs 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok [root@quaco ~]# Reported-by: Michael Petlan Tested-by: Michael Petlan Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-mv8kolk17xla1smvmp3qabv1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ed4583128b9c..b36061cd1ab8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (IS_ERR(evsel)) + if (ret) return false; - if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; } - evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; } static struct perf_evsel *perf_evsel__new_pgfault(u64 config) From e02e07e3127d8aec1f4bcdfb2fc52a2d99b4859e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 15 Jan 2019 16:04:54 +0800 Subject: [PATCH 0583/1436] MIPS: Loongson: Introduce and use loongson_llsc_mb() On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions "before each ll/lld" and "at the branch-target between ll/sc" to workaround. Otherwise, this flaw will cause deadlock occasionally (e.g. when doing heavy load test with LTP). Below is the explaination of CPU designer: "For Loongson 3 family, when a memory access instruction (load, store, or prefetch)'s executing occurs between the execution of LL and SC, the success or failure of SC is not predictable. Although programmer would not insert memory access instructions between LL and SC, the memory instructions before LL in program-order, may dynamically executed between the execution of LL/SC, so a memory fence (SYNC) is needed before LL/LLD to avoid this situation. Since Loongson-3A R2 (3A2000), we have improved our hardware design to handle this case. But we later deduce a rarely circumstance that some speculatively executed memory instructions due to branch misprediction between LL/SC still fall into the above case, so a memory fence (SYNC) at branch-target (if its target is not between LL/SC) is needed for Loongson 3A1000, 3B1500, 3A2000 and 3A3000. Our processor is continually evolving and we aim to to remove all these workaround-SYNCs around LL/SC for new-come processor." Here is an example: Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var, this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same time('sc' return 1), and the variable is only *added by 1*, sometimes, which is wrong and unacceptable(it should be added by 2). Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's __ex_table section. This patch fix all the cases in kernel, but: +. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix the ll and branch-target coincidently such as atomic_sub_if_positive/ cmpxchg/xchg, just like this one. +. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch edac.h +. local_ops and cmpxchg_local should not be affected by this bug since only the owner can write. +. mips_atomic_set for syscall.c is deprecated and rarely used, just let it go Signed-off-by: Huacai Chen Signed-off-by: Huang Pei [paul.burton@mips.com: - Simplify the addition of -mno-fix-loongson3-llsc to cflags, and add a comment describing why it's there. - Make loongson_llsc_mb() a no-op when CONFIG_CPU_LOONGSON3_WORKAROUNDS=n, rather than a compiler memory barrier. - Add a comment describing the bug & how loongson_llsc_mb() helps in asm/barrier.h.] Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: ambrosehua@gmail.com Cc: Steven J . Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Li Xuefeng Cc: Xu Chenghua --- arch/mips/Kconfig | 15 ++++++++++++++ arch/mips/include/asm/atomic.h | 6 ++++++ arch/mips/include/asm/barrier.h | 36 +++++++++++++++++++++++++++++++++ arch/mips/include/asm/bitops.h | 5 +++++ arch/mips/include/asm/futex.h | 3 +++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 23 +++++++++++++++++++++ arch/mips/mm/tlbex.c | 10 +++++++++ 8 files changed, 100 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 0d14f51d0002..a84c24d894aa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1403,6 +1403,21 @@ config LOONGSON3_ENHANCEMENT please say 'N' here. If you want a high-performance kernel to run on new Loongson 3 machines only, please say 'Y' here. +config CPU_LOONGSON3_WORKAROUNDS + bool "Old Loongson 3 LLSC Workarounds" + default y if SMP + depends on CPU_LOONGSON3 + help + Loongson 3 processors have the llsc issues which require workarounds. + Without workarounds the system may hang unexpectedly. + + Newer Loongson 3 will fix these issues and no workarounds are needed. + The workarounds have no significant side effect on them but may + decrease the performance of the system so this option should be + disabled unless the kernel is intended to be run on old systems. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35e2957..94096299fc56 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -85,6 +86,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -118,6 +120,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -256,6 +259,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -283,6 +287,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -316,6 +321,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb199a7..b7f6ac5e513c 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,42 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb() +/* + * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, + * store or pref) in between an ll & sc can cause the sc instruction to + * erroneously succeed, breaking atomicity. Whilst it's unusual to write code + * containing such sequences, this bug bites harder than we might otherwise + * expect due to reordering & speculation: + * + * 1) A memory access appearing prior to the ll in program order may actually + * be executed after the ll - this is the reordering case. + * + * In order to avoid this we need to place a memory barrier (ie. a sync + * instruction) prior to every ll instruction, in between it & any earlier + * memory access instructions. Many of these cases are already covered by + * smp_mb__before_llsc() but for the remaining cases, typically ones in + * which multiple CPUs may operate on a memory location but ordering is not + * usually guaranteed, we use loongson_llsc_mb() below. + * + * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. + * + * 2) If a conditional branch exists between an ll & sc with a target outside + * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg() + * or similar, then misprediction of the branch may allow speculative + * execution of memory accesses from outside of the ll-sc loop. + * + * In order to avoid this we need a memory barrier (ie. a sync instruction) + * at each affected branch target, for which we also use loongson_llsc_mb() + * defined below. + * + * This case affects all current Loongson 3 CPUs. + */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() do { } while (0) +#endif + #include #endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index c4675957b21b..830c93a010c3 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -79,6 +80,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -123,6 +125,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -133,6 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) } while (unlikely(!temp)); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -193,6 +197,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index c14d798f3888..b83b0397462d 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -163,6 +164,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -192,6 +194,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc8fd98..910851c62db3 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce4608aa88..c1a4d4dc4665 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap + +# +# Some versions of binutils, not currently mainline as of 2019/02/04, support +# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction +# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a +# description). +# +# We disable this in order to prevent the assembler meddling with the +# instruction that labels refer to, ie. if we label an ll instruction: +# +# 1: ll v0, 0(a0) +# +# ...then with the assembler fix applied the label may actually point at a sync +# instruction inserted by the assembler, and if we were using the label in an +# exception table the table would no longer contain the address of the ll +# instruction. +# +# Avoid this by explicitly disabling that assembler behaviour. If upstream +# binutils does not merge support for the flag then we can revisit & remove +# this later - for now it ensures vendor toolchains don't cause problems. +# +cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb246332..65b6e85447b1 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr); @@ -1646,6 +1648,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2263,8 @@ static void build_r4000_tlb_load_handler(void) #endif uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2319,8 @@ static void build_r4000_tlb_store_handler(void) #endif uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2376,8 @@ static void build_r4000_tlb_modify_handler(void) #endif uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { From 047f2d941b8b24cadd6a4a09e606b7f41188ba3e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 4 Feb 2019 19:53:53 +0000 Subject: [PATCH 0584/1436] MIPS: Use lower case for addresses in nexys4ddr.dts DTC introduced an i2c_bus_reg check in v1.4.7, used since Linux v4.20, which complains about upper case addresses used in the unit name. nexys4ddr.dts names an I2C device node "ad7420@4B", leading to: arch/mips/boot/dts/xilfpga/nexys4ddr.dts:109.16-112.8: Warning (i2c_bus_reg): /i2c@10A00000/ad7420@4B: I2C bus unit address format error, expected "4b" Fix this by switching to lower case addresses throughout the file, as is *mostly* the case in the file already & fairly standard throughout the tree. Signed-off-by: Paul Burton Cc: stable@vger.kernel.org # v4.20+ Cc: linux-mips@vger.kernel.org --- arch/mips/boot/dts/xilfpga/nexys4ddr.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/xilfpga/nexys4ddr.dts b/arch/mips/boot/dts/xilfpga/nexys4ddr.dts index 2152b7ba65fb..cc8dbea0911f 100644 --- a/arch/mips/boot/dts/xilfpga/nexys4ddr.dts +++ b/arch/mips/boot/dts/xilfpga/nexys4ddr.dts @@ -90,11 +90,11 @@ interrupts = <0>; }; - axi_i2c: i2c@10A00000 { + axi_i2c: i2c@10a00000 { compatible = "xlnx,xps-iic-2.00.a"; interrupt-parent = <&axi_intc>; interrupts = <4>; - reg = < 0x10A00000 0x10000 >; + reg = < 0x10a00000 0x10000 >; clocks = <&ext>; xlnx,clk-freq = <0x5f5e100>; xlnx,family = "Artix7"; @@ -106,9 +106,9 @@ #address-cells = <1>; #size-cells = <0>; - ad7420@4B { + ad7420@4b { compatible = "adi,adt7420"; - reg = <0x4B>; + reg = <0x4b>; }; } ; }; From 7e8a5903774bc5ce76efc1ed2782fa906e58f8a8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 4 Feb 2019 10:43:19 -0800 Subject: [PATCH 0585/1436] selftests/bpf: use localhost in tcp_{server,client}.py Bind and connect to localhost. There is no reason for this test to use non-localhost interface. This lets us run this test in a network namespace. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/tcp_client.py | 3 +-- tools/testing/selftests/bpf/tcp_server.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py index 7f8200a8702b..a53ed58528d6 100755 --- a/tools/testing/selftests/bpf/tcp_client.py +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -30,12 +30,11 @@ def send(sock, s): serverPort = int(sys.argv[1]) -HostName = socket.gethostname() # create active socket sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) try: - sock.connect((HostName, serverPort)) + sock.connect(('localhost', serverPort)) except socket.error as e: sys.exit(1) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py index b39903fca4c8..0ca60d193bed 100755 --- a/tools/testing/selftests/bpf/tcp_server.py +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -35,13 +35,10 @@ MAX_PORTS = 2 serverPort = SERVER_PORT serverSocket = None -HostName = socket.gethostname() - # create passive socket serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -host = socket.gethostname() -try: serverSocket.bind((host, 0)) +try: serverSocket.bind(('localhost', 0)) except socket.error as msg: print('bind fails: ' + str(msg)) From b8dcf8d149db5999d3db937822d3e374eca68b9f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 4 Feb 2019 11:00:57 -0800 Subject: [PATCH 0586/1436] tools/bpf: expose functions btf_ext__* as API functions The following set of functions, which manipulates .BTF.ext section, are exposed as API functions: . btf_ext__new . btf_ext__free . btf_ext__reloc_func_info . btf_ext__reloc_line_info . btf_ext__func_info_rec_size . btf_ext__line_info_rec_size These functions are useful for JIT based bpf codegen, e.g., bcc, to manipulate in-memory .BTF.ext sections. The signature of function btf_ext__reloc_func_info() is also changed to be the same as its definition in btf.c. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.h | 24 ++++++++++++------------ tools/lib/bpf/libbpf.map | 6 ++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b1e8e54cc21d..418389e2a662 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -67,18 +67,18 @@ LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -struct btf_ext *btf_ext__new(__u8 *data, __u32 size); -void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *func_info_len); -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 62c680fb13d1..46441c5f030b 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -133,4 +133,10 @@ LIBBPF_0.0.2 { bpf_map_lookup_elem_flags; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; + btf_ext__free; + btf_ext__func_info_rec_size; + btf_ext__line_info_rec_size; + btf_ext__new; + btf_ext__reloc_func_info; + btf_ext__reloc_line_info; } LIBBPF_0.0.1; From 96408c43447aff5091a6938f29d8b6f2d0aa2064 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 4 Feb 2019 11:00:58 -0800 Subject: [PATCH 0587/1436] tools/bpf: implement libbpf btf__get_map_kv_tids() API function Currently, to get map key/value type id's, the macro BPF_ANNOTATE_KV_PAIR(, , ) needs to be defined in the bpf program for the corresponding map. During program/map loading time, the local static function bpf_map_find_btf_info() in libbpf.c is implemented to retrieve the key/value type ids given the map name. The patch refactored function bpf_map_find_btf_info() to create an API btf__get_map_kv_tids() which includes the bulk of implementation for the original function. The API btf__get_map_kv_tids() can be used by bcc, a JIT based bpf compilation system, which uses the same BPF_ANNOTATE_KV_PAIR to record map key/value types. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 73 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 4 +++ tools/lib/bpf/libbpf.c | 72 +++++---------------------------------- tools/lib/bpf/libbpf.map | 1 + 4 files changed, 87 insertions(+), 63 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 51a0db05bf80..7ec0463354db 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ +#include #include #include #include @@ -504,6 +505,78 @@ exit_free: return err; } +int btf__get_map_kv_tids(const struct btf *btf, char *map_name, + __u32 expected_key_size, __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_warning("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); + return -EINVAL; + } + + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); + return -EINVAL; + } + + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", map_name); + return key_size; + } + + if (expected_key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warning("map:%s invalid BTF value_type_size\n", map_name); + return value_size; + } + + if (expected_value_size != value_size) { + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); + return -EINVAL; + } + + *key_type_id = key->type; + *value_type_id = value->type; + + return 0; +} + struct btf_ext_sec_copy_param { __u32 off; __u32 len; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 418389e2a662..258c87e9f55d 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -66,6 +66,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, char *map_name, + __u32 expected_key_size, + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ce209ab9a1a2..84ca6c2bea91 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1056,72 +1056,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { - const struct btf_type *container_type; - const struct btf_member *key, *value; struct bpf_map_def *def = &map->def; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; + __u32 key_type_id, value_type_id; + int ret; - if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == - max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map->name, map->name); - return -EINVAL; - } + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + if (ret) + return ret; - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map->name, container_name); - return container_id; - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map->name, container_id); - return -EINVAL; - } - - if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || - BTF_INFO_VLEN(container_type->info) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map->name, container_name); - return -EINVAL; - } - - key = (struct btf_member *)(container_type + 1); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", - map->name); - return key_size; - } - - if (def->key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map->name, (__u32)key_size, def->key_size); - return -EINVAL; - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map->name); - return value_size; - } - - if (def->value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map->name, (__u32)value_size, def->value_size); - return -EINVAL; - } - - map->btf_key_type_id = key->type; - map->btf_value_type_id = value->type; + map->btf_key_type_id = key_type_id; + map->btf_value_type_id = value_type_id; return 0; } diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 46441c5f030b..7990e857e003 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -133,6 +133,7 @@ LIBBPF_0.0.2 { bpf_map_lookup_elem_flags; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; + btf__get_map_kv_tids; btf_ext__free; btf_ext__func_info_rec_size; btf_ext__line_info_rec_size; From cdc306a5c9cd3607db5d018c6320cdd923c04373 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 13 Oct 2018 20:34:42 +0800 Subject: [PATCH 0588/1436] rds: make v3.1 as compat version Mark RDSv3.1 as compat version and add v4.1 version macro's. Subsequent patches enable TOS(Type of Service) feature which is tied with v4.1 for RDMA transport. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- net/rds/connection.c | 1 + net/rds/ib_cm.c | 40 +++++++++++++++++++++++----------------- net/rds/rds.h | 4 ++++ net/rds/threads.c | 1 + 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 3bd2f4a5a30d..1ab14b68ecc8 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -139,6 +139,7 @@ static void __rds_conn_path_init(struct rds_connection *conn, atomic_set(&cp->cp_state, RDS_CONN_DOWN); cp->cp_send_gen = 0; cp->cp_reconnect_jiffies = 0; + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index bfbb31f0c7fd..0eeae0910f06 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -133,23 +133,24 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_ib_set_flow_control(conn, be32_to_cpu(credit)); } - if (conn->c_version < RDS_PROTOCOL(3, 1)) { - pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version)); - set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); - rds_conn_destroy(conn); - return; - } else { - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", - ic->i_active_side ? "Active" : "Passive", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version), - ic->i_flowctl ? ", flow control" : ""); + if (conn->c_version < RDS_PROTOCOL_VERSION) { + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { + pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", + &conn->c_laddr, &conn->c_faddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version)); + rds_conn_destroy(conn); + return; + } } + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", + ic->i_active_side ? "Active" : "Passive", + &conn->c_laddr, &conn->c_faddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version), + ic->i_flowctl ? ", flow control" : ""); + atomic_set(&ic->i_cq_quiesce, 0); /* Init rings and fill recv. this needs to wait until protocol @@ -184,6 +185,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even NULL); } + conn->c_proposed_version = conn->c_version; rds_connect_complete(conn); } @@ -667,6 +669,9 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; + } else if (RDS_PROTOCOL_COMPAT_VERSION == + RDS_PROTOCOL(major, minor)) { + version = RDS_PROTOCOL_COMPAT_VERSION; } else { if (isv6) printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", @@ -861,7 +866,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); + rds_ib_set_protocol(conn, RDS_PROTOCOL_VERSION); ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); @@ -870,7 +875,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) goto out; } - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, + conn->c_proposed_version, UINT_MAX, UINT_MAX, isv6); ret = rdma_connect(cm_id, &conn_param); if (ret) diff --git a/net/rds/rds.h b/net/rds/rds.h index 4ffe100ff5e6..660023f08553 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -19,10 +19,13 @@ */ #define RDS_PROTOCOL_3_0 0x0300 #define RDS_PROTOCOL_3_1 0x0301 +#define RDS_PROTOCOL_4_0 0x0400 +#define RDS_PROTOCOL_4_1 0x0401 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 /* The following ports, 16385, 18634, 18635, are registered with IANA as * the ports to be used for RDS over TCP and UDP. Currently, only RDS over @@ -151,6 +154,7 @@ struct rds_connection { struct rds_cong_map *c_fcong; /* Protocol version */ + unsigned int c_proposed_version; unsigned int c_version; possible_net_t c_net; diff --git a/net/rds/threads.c b/net/rds/threads.c index e64f9e4c3cda..32dc50f0a303 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -93,6 +93,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); } rcu_read_unlock(); + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; } EXPORT_SYMBOL_GPL(rds_connect_path_complete); From d021fabf525ffdaeb4e6f1cf50e1ba325ca5273b Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 23 Oct 2018 23:09:00 -0400 Subject: [PATCH 0589/1436] rds: rdma: add consumer reject For legacy protocol version incompatibility with non linux RDS, consumer reject reason being used to convey it to peer. But the choice of reject reason value as '1' was really poor. Anyway for interoperability reasons with shipping products, it needs to be supported. For any future versions, properly encoded reject reason should to be used. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- net/rds/ib_cm.c | 6 ++++-- net/rds/rdma_transport.c | 12 ++++++++++++ net/rds/rdma_transport.h | 6 ++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0eeae0910f06..a1c3ad380ec8 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -734,8 +734,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, /* Check whether the remote protocol version matches ours. */ version = rds_ib_protocol_compatible(event, isv6); - if (!version) + if (!version) { + err = RDS_RDMA_REJ_INCOMPAT; goto out; + } dp = event->param.conn.private_data; if (isv6) { @@ -851,7 +853,7 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, NULL, 0); + rdma_reject(cm_id, &err, sizeof(int)); return destroy; } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 6b0f57c83a2a..63cbc6b8560c 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -51,6 +51,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rds_connection *conn = cm_id->context; struct rds_transport *trans; int ret = 0; + int *err; + u8 len; rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, event->event, rdma_event_msg(event->event)); @@ -106,8 +108,18 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_REJECTED: + if (!conn) + break; + err = (int *)rdma_consumer_reject_data(cm_id, event, &len); + if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { + pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", + &conn->c_laddr, &conn->c_faddr); + conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + rds_conn_drop(conn); + } rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); + break; /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index 200d3134aaae..bfafd4a6d827 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -11,6 +11,12 @@ #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 +/* Below reject reason is for legacy interoperability issue with non-linux + * RDS endpoints where older version incompatibility is conveyed via value 1. + * For future version(s), proper encoded reject reason should be be used. + */ +#define RDS_RDMA_REJ_INCOMPAT 1 + int rds_rdma_conn_connect(struct rds_connection *conn); int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); From 3eb450367d0823226515ee24712ed08eccb33eb9 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 23 Oct 2018 23:21:14 -0400 Subject: [PATCH 0590/1436] rds: add type of service(tos) infrastructure RDS Service type (TOS) is user-defined and needs to be configured via RDS IOCTL interface. It must be set before initiating any traffic and once set the TOS can not be changed. All out-going traffic from the socket will be associated with its TOS. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- include/uapi/linux/rds.h | 11 +++++++++++ net/rds/af_rds.c | 35 ++++++++++++++++++++++++++++++++++- net/rds/connection.c | 20 +++++++++++--------- net/rds/ib.c | 1 + net/rds/ib_cm.c | 2 +- net/rds/rdma_transport.c | 1 + net/rds/rds.h | 9 +++++++-- net/rds/recv.c | 1 + net/rds/send.c | 6 +++--- net/rds/tcp.c | 1 + net/rds/tcp_listen.c | 2 +- 11 files changed, 72 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 8b73cb603c5f..5d0f76c780e5 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -69,6 +69,12 @@ #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* IOCTLS commands for SOL_RDS */ +#define SIOCRDSSETTOS (SIOCPROTOPRIVATE) +#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1) + +typedef __u8 rds_tos_t; + /* * Control message types for SOL_RDS. * @@ -149,6 +155,7 @@ struct rds_info_connection { __be32 faddr; __u8 transport[TRANSNAMSIZ]; /* null term ascii */ __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_connection { @@ -171,6 +178,7 @@ struct rds_info_message { __be16 lport; __be16 fport; __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_message { @@ -214,6 +222,7 @@ struct rds_info_tcp_socket { __u32 last_sent_nxt; __u32 last_expected_una; __u32 last_seen_una; + __u8 tos; } __attribute__((packed)); struct rds6_info_tcp_socket { @@ -240,6 +249,7 @@ struct rds_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; struct rds6_info_rdma_connection { @@ -253,6 +263,7 @@ struct rds6_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; /* RDS message Receive Path Latency points */ diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 65571a6273c3..904515858037 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -254,7 +254,38 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - return -ENOIOCTLCMD; + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + rds_tos_t tos; + + switch (cmd) { + case SIOCRDSSETTOS: + if (get_user(tos, (rds_tos_t __user *)arg)) + return -EFAULT; + + if (rs->rs_transport && + rs->rs_transport->t_type == RDS_TRANS_TCP) + tos = 0; + + spin_lock_bh(&rds_sock_lock); + if (rs->rs_tos || rs->rs_conn) { + spin_unlock_bh(&rds_sock_lock); + return -EINVAL; + } + rs->rs_tos = tos; + spin_unlock_bh(&rds_sock_lock); + break; + case SIOCRDSGETTOS: + spin_lock_bh(&rds_sock_lock); + tos = rs->rs_tos; + spin_unlock_bh(&rds_sock_lock); + if (put_user(tos, (rds_tos_t __user *)arg)) + return -EFAULT; + break; + default: + return -ENOIOCTLCMD; + } + + return 0; } static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, @@ -650,6 +681,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; + rs->rs_tos = 0; + rs->rs_conn = NULL; spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); diff --git a/net/rds/connection.c b/net/rds/connection.c index 1ab14b68ecc8..7ea134f9a825 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - int dev_if) + u8 tos, int dev_if) { struct rds_connection *conn, *ret = NULL; @@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, if (ipv6_addr_equal(&conn->c_faddr, faddr) && ipv6_addr_equal(&conn->c_laddr, laddr) && conn->c_trans == trans && + conn->c_tos == tos && net == rds_conn_net(conn) && conn->c_dev_if == dev_if) { ret = conn; @@ -160,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, + gfp_t gfp, u8 tos, int is_outgoing, int dev_if) { @@ -172,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); - conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); + conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && @@ -206,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_faddr = *faddr; conn->c_dev_if = dev_if; + conn->c_tos = tos; #if IS_ENABLED(CONFIG_IPV6) /* If the local address is link local, set c_bound_if to be the @@ -298,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_connection *found; found = rds_conn_lookup(net, head, laddr, faddr, trans, - dev_if); + tos, dev_if); if (found) { struct rds_conn_path *cp; int i; @@ -333,10 +335,10 @@ out: struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, - int dev_if) + struct rds_transport *trans, u8 tos, + gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create); @@ -344,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if) + u8 tos, gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); diff --git a/net/rds/ib.c b/net/rds/ib.c index 9d7b7586f240..21b6588b71ca 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; + iinfo->tos = conn->c_tos; memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index a1c3ad380ec8..70518e329a9e 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -786,7 +786,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, /* RDS/IB is not currently netns aware, thus init_net */ conn = rds_conn_create(&init_net, daddr6, saddr6, - &rds_ib_transport, GFP_KERNEL, ifindex); + &rds_ib_transport, 0, GFP_KERNEL, ifindex); if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); conn = NULL; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 63cbc6b8560c..e37f91537d29 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -115,6 +115,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", &conn->c_laddr, &conn->c_faddr); conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + conn->c_tos = 0; rds_conn_drop(conn); } rdsdebug("Connection rejected: %s\n", diff --git a/net/rds/rds.h b/net/rds/rds.h index 660023f08553..7e52b92092d7 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -158,6 +158,9 @@ struct rds_connection { unsigned int c_version; possible_net_t c_net; + /* TOS */ + u8 c_tos; + struct list_head c_map_item; unsigned long c_map_queued; @@ -652,6 +655,7 @@ struct rds_sock { u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; struct rds_msg_zcopy_queue rs_zcookie_queue; + u8 rs_tos; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -760,13 +764,14 @@ void rds_conn_exit(void); struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, + struct rds_transport *trans, + u8 tos, gfp_t gfp, int dev_if); struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if); + u8 tos, gfp_t gfp, int dev_if); void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); diff --git a/net/rds/recv.c b/net/rds/recv.c index 6bb6b16ca270..853de4876088 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo.len = be32_to_cpu(inc->i_hdr.h_len); + minfo.tos = inc->i_conn->c_tos; if (flip) { minfo.laddr = daddr; diff --git a/net/rds/send.c b/net/rds/send.c index fd8b687d5c05..c555e121b908 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1277,12 +1277,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ - if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) + if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) { conn = rs->rs_conn; - else { + } else { conn = rds_conn_create_outgoing(sock_net(sock->sk), &rs->rs_bound_addr, &daddr, - rs->rs_transport, + rs->rs_transport, 0, sock->sk->sk_allocation, scope_id); if (IS_ERR(conn)) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c16f0a362c32..eb6851952cbf 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, tsinfo.last_sent_nxt = tc->t_last_sent_nxt; tsinfo.last_expected_una = tc->t_last_expected_una; tsinfo.last_seen_una = tc->t_last_seen_una; + tsinfo.tos = tc->t_cpath->cp_conn->c_tos; rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index c12203f646da..810a3a49e947 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock) conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, - &rds_tcp_transport, GFP_KERNEL, dev_if); + &rds_tcp_transport, 0, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { ret = PTR_ERR(conn); From 56dc8bce9f761cf61258e25d96dec4072273d8db Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 13 Oct 2018 21:36:49 +0800 Subject: [PATCH 0591/1436] rds: add transport specific tos_map hook RDMA transport maps user tos to underline virtual lanes(VL) for IB or DSCP values. RDMA CM transport abstract thats for RDS. TCP transport makes use of default priority 0 and maps all user tos values to it. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- net/rds/af_rds.c | 10 ++++++---- net/rds/ib.c | 10 ++++++++++ net/rds/rds.h | 1 + net/rds/tcp.c | 7 +++++++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 904515858037..d6cc97fbbbb0 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -255,16 +255,18 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); - rds_tos_t tos; + rds_tos_t utos, tos = 0; switch (cmd) { case SIOCRDSSETTOS: - if (get_user(tos, (rds_tos_t __user *)arg)) + if (get_user(utos, (rds_tos_t __user *)arg)) return -EFAULT; if (rs->rs_transport && - rs->rs_transport->t_type == RDS_TRANS_TCP) - tos = 0; + rs->rs_transport->get_tos_map) + tos = rs->rs_transport->get_tos_map(utos); + else + return -ENOIOCTLCMD; spin_lock_bh(&rds_sock_lock); if (rs->rs_tos || rs->rs_conn) { diff --git a/net/rds/ib.c b/net/rds/ib.c index 21b6588b71ca..2da9b75bad16 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -515,6 +515,15 @@ void rds_ib_exit(void) rds_ib_mr_exit(); } +static u8 rds_ib_get_tos_map(u8 tos) +{ + /* 1:1 user to transport map for RDMA transport. + * In future, if custom map is desired, hook can export + * user configurable map. + */ + return tos; +} + struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, @@ -537,6 +546,7 @@ struct rds_transport rds_ib_transport = { .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, + .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, diff --git a/net/rds/rds.h b/net/rds/rds.h index 7e52b92092d7..0d8f67cadd74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -574,6 +574,7 @@ struct rds_transport { void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); bool (*t_unloading)(struct rds_connection *conn); + u8 (*get_tos_map)(u8 tos); }; /* Bind hash table key length. It is the sum of the size of a struct diff --git a/net/rds/tcp.c b/net/rds/tcp.c index eb6851952cbf..fd2694174607 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -453,6 +453,12 @@ static void rds_tcp_destroy_conns(void) static void rds_tcp_exit(void); +static u8 rds_tcp_get_tos_map(u8 tos) +{ + /* all user tos mapped to default 0 for TCP transport */ + return 0; +} + struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, .xmit_path_prepare = rds_tcp_xmit_path_prepare, @@ -467,6 +473,7 @@ struct rds_transport rds_tcp_transport = { .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, .exit = rds_tcp_exit, + .get_tos_map = rds_tcp_get_tos_map, .t_owner = THIS_MODULE, .t_name = "tcp", .t_type = RDS_TRANS_TCP, From fd261ce6a30e01ad67c416e2c67e263024b3a6f9 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 13 Oct 2018 22:13:23 +0800 Subject: [PATCH 0592/1436] rds: rdma: update rdma transport for tos For RDMA transports, RDS TOS is an extension of IB QoS(Annex A13) to provide clients the ability to segregate traffic flows for different type of data. RDMA CM abstract it for ULPs using rdma_set_service_type(). Internally, each traffic flow is represented by a connection with all of its independent resources like that of a normal connection, and is differentiated by service type. In other words, there can be multiple qp connections between an IP pair and each supports a unique service type. The feature has been added from RDSv4.1 onwards and supports rolling upgrades. RDMA connection metadata also carries the tos information to set up SL on end to end context. The original code was developed by Bang Nguyen in downstream kernel back in 2.6.32 kernel days and it has evolved over period of time. Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun --- net/rds/ib.h | 4 +++- net/rds/ib_cm.c | 32 +++++++++++++++++--------------- net/rds/ib_recv.c | 4 ++-- net/rds/ib_send.c | 5 +++-- net/rds/rdma_transport.c | 1 + net/rds/send.c | 5 +++-- 6 files changed, 29 insertions(+), 22 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 71ff356ee702..752f92235a38 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn { u8 ricpc_protocol_major; u8 ricpc_protocol_minor; __be16 ricpc_protocol_minor_mask; /* bitmask */ - __be32 ricpc_reserved1; + u8 ricpc_dp_toss; + u8 ripc_reserved1; + __be16 ripc_reserved2; __be64 ricpc_ack_seq; __be32 ricpc_credit; /* non-zero enables flow ctl */ }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 70518e329a9e..66c6eb56072b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -144,9 +144,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", ic->i_active_side ? "Active" : "Passive", - &conn->c_laddr, &conn->c_faddr, + &conn->c_laddr, &conn->c_faddr, conn->c_tos, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); @@ -222,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v6.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v6; conn_param->private_data_len = sizeof(dp->ricp_v6); @@ -236,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v4.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v4; conn_param->private_data_len = sizeof(dp->ricp_v4); @@ -391,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rdsdebug("Fatal QP Event %u (%s) " - "- connection %pI6c->%pI6c, reconnecting\n", - event->event, ib_event_msg(event->event), - &conn->c_laddr, &conn->c_faddr); + rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", + event->event, ib_event_msg(event->event), + &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); break; } @@ -662,11 +663,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) /* Even if len is crap *now* I still want to check it. -ASG */ if (event->param.conn.private_data_len < data_len || major == 0) - return RDS_PROTOCOL_3_0; + return RDS_PROTOCOL_4_0; common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; - if (major == 3 && common) { - version = RDS_PROTOCOL_3_0; + if (major == 4 && common) { + version = RDS_PROTOCOL_4_0; while ((common >>= 1) != 0) version++; } else if (RDS_PROTOCOL_COMPAT_VERSION == @@ -778,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, daddr6 = &d_mapped_addr; } - rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " - "0x%llx\n", saddr6, daddr6, - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), + rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", + saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), + RDS_PROTOCOL_MINOR(version), (unsigned long long)be64_to_cpu(lguid), - (unsigned long long)be64_to_cpu(fguid)); + (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); /* RDS/IB is not currently netns aware, thus init_net */ conn = rds_conn_create(&init_net, daddr6, saddr6, - &rds_ib_transport, 0, GFP_KERNEL, ifindex); + &rds_ib_transport, dp_cmn->ricpc_dp_toss, + GFP_KERNEL, ifindex); if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); conn = NULL; @@ -868,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ - rds_ib_set_protocol(conn, RDS_PROTOCOL_VERSION); + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 2f16146e4ec9..d395eec98959 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, - wc->status, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e0c36acf866..09c46f2e97fa 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", - &conn->c_laddr, &conn->c_faddr, wc->status, + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index e37f91537d29..46bce8389066 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -83,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_RESOLVED: + rdma_set_service_type(cm_id, conn->c_tos); /* XXX do we need to clean up if this fails? */ ret = rdma_resolve_route(cm_id, RDS_RDMA_RESOLVE_TIMEOUT_MS); diff --git a/net/rds/send.c b/net/rds/send.c index c555e121b908..166dd578c1cc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ - if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) { + if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) && + rs->rs_tos == rs->rs_conn->c_tos) { conn = rs->rs_conn; } else { conn = rds_conn_create_outgoing(sock_net(sock->sk), &rs->rs_bound_addr, &daddr, - rs->rs_transport, 0, + rs->rs_transport, rs->rs_tos, sock->sk->sk_allocation, scope_id); if (IS_ERR(conn)) { From 2b424cfc69728224fcb5fad138ea7260728e0901 Mon Sep 17 00:00:00 2001 From: Jun-Ru Chang Date: Tue, 29 Jan 2019 11:56:07 +0800 Subject: [PATCH 0593/1436] MIPS: Remove function size check in get_frame_info() Patch (b6c7a324df37b "MIPS: Fix get_frame_info() handling of microMIPS function size.") introduces additional function size check for microMIPS by only checking insn between ip and ip + func_size. However, func_size in get_frame_info() is always 0 if KALLSYMS is not enabled. This causes get_frame_info() to return immediately without calculating correct frame_size, which in turn causes "Can't analyze schedule() prologue" warning messages at boot time. This patch removes func_size check, and let the frame_size check run up to 128 insns for both MIPS and microMIPS. Signed-off-by: Jun-Ru Chang Signed-off-by: Tony Wu Signed-off-by: Paul Burton Fixes: b6c7a324df37b ("MIPS: Fix get_frame_info() handling of microMIPS function size.") Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: --- arch/mips/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 6829a064aac8..339870ed92f7 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -371,7 +371,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -384,10 +384,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.word = ip->halfword[0] << 16; last_insn_size = 2; From a8a1f7d09cfc7e18874786c7634c9e71384fcd4e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 4 Feb 2019 16:20:55 -0800 Subject: [PATCH 0594/1436] libbpf: fix libbpf_print With the recent print rework we now have the following problem: pr_{warning,info,debug} expand to __pr which calls libbpf_print. libbpf_print does va_start and calls __libbpf_pr with va_list argument. In __base_pr we again do va_start. Because the next argument is a va_list, we don't get correct pointer to the argument (and print noting in my case, I don't know why it doesn't crash tbh). Fix this by changing libbpf_print_fn_t signature to accept va_list and remove unneeded calls to va_start in the existing users. Alternatively, this can we solved by exporting __libbpf_pr and changing __pr macro to (and killing libbpf_print): { if (__libbpf_pr) __libbpf_pr(level, "libbpf: " fmt, ##__VA_ARGS__) } Signed-off-by: Stanislav Fomichev Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 14 ++++---------- tools/lib/bpf/libbpf.h | 3 +-- tools/perf/util/bpf-loader.c | 10 ++-------- tools/testing/selftests/bpf/test_btf.c | 13 ++----------- tools/testing/selftests/bpf/test_libbpf_open.c | 10 ++-------- tools/testing/selftests/bpf/test_progs.c | 10 ++-------- 6 files changed, 13 insertions(+), 47 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 84ca6c2bea91..47969aa0faf8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -54,22 +54,16 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(2, 3) -static int __base_pr(enum libbpf_print_level level, const char *format, ...) +static int __base_pr(enum libbpf_print_level level, const char *format, + va_list args) { - va_list args; - int err; - if (level == LIBBPF_DEBUG) return 0; - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } -static __printf(2, 3) libbpf_print_fn_t __libbpf_pr = __base_pr; +static libbpf_print_fn_t __libbpf_pr = __base_pr; void libbpf_set_print(libbpf_print_fn_t fn) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 19dbc1bed960..69a7c25eaccc 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -54,8 +54,7 @@ enum libbpf_print_level { }; typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, - const char *, ...) - __attribute__((format(printf, 2, 3))); + const char *, va_list ap); LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 38afdbe6a9e0..037d8ff6a634 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -25,15 +25,9 @@ #include "c++/clang-c.h" static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), - const char *fmt, ...) + const char *fmt, va_list args) { - va_list args; - int ret; - - va_start(args, fmt); - ret = veprintf(1, verbose, pr_fmt(fmt), args); - va_end(args); - return ret; + return veprintf(1, verbose, pr_fmt(fmt), args); } struct bpf_prog_priv { diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index aebaeff5a5a0..5afab823ffbe 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -52,19 +52,10 @@ static int count_result(int err) return err; } -#define __printf(a, b) __attribute__((format(printf, a, b))) - -__printf(2, 3) static int __base_pr(enum libbpf_print_level level __attribute__((unused)), - const char *format, ...) + const char *format, va_list args) { - va_list args; - int err; - - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } #define BTF_INFO_ENC(kind, kind_flag, vlen) \ diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c index b9ff3bf76544..1909ecf4d999 100644 --- a/tools/testing/selftests/bpf/test_libbpf_open.c +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -36,19 +36,13 @@ static void usage(char *argv[]) static bool debug = 0; static int libbpf_debug_print(enum libbpf_print_level level, - const char *fmt, ...) + const char *fmt, va_list args) { - va_list args; - int ret; - if (level == LIBBPF_DEBUG && !debug) return 0; - va_start(args, fmt); fprintf(stderr, "[%d] ", level); - ret = vfprintf(stderr, fmt, args); - va_end(args); - return ret; + return vfprintf(stderr, fmt, args); } #define EXIT_FAIL_LIBBPF EXIT_FAILURE diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 55d05102e7bf..c52bd90fbb34 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1785,18 +1785,12 @@ static void test_task_fd_query_tp(void) } static int libbpf_debug_print(enum libbpf_print_level level, - const char *format, ...) + const char *format, va_list args) { - va_list args; - int ret; - if (level == LIBBPF_DEBUG) return 0; - va_start(args, format); - ret = vfprintf(stderr, format, args); - va_end(args); - return ret; + return vfprintf(stderr, format, args); } static void test_reference_tracking() From 17ab4f61b8cd6f9c38e9d0b935d86d73b5d0d2b5 Mon Sep 17 00:00:00 2001 From: Rundong Ge Date: Sat, 2 Feb 2019 14:29:35 +0000 Subject: [PATCH 0595/1436] net: dsa: slave: Don't propagate flag changes on down slave interfaces The unbalance of master's promiscuity or allmulti will happen after ifdown and ifup a slave interface which is in a bridge. When we ifdown a slave interface , both the 'dsa_slave_close' and 'dsa_slave_change_rx_flags' will clear the master's flags. The flags of master will be decrease twice. In the other hand, if we ifup the slave interface again, since the slave's flags were cleared the 'dsa_slave_open' won't set the master's flag, only 'dsa_slave_change_rx_flags' that triggered by 'br_add_if' will set the master's flags. The flags of master is increase once. Only propagating flag changes when a slave interface is up makes sure this does not happen. The 'vlan_dev_change_rx_flags' had the same problem and was fixed, and changes here follows that fix. Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") Signed-off-by: Rundong Ge Signed-off-by: David S. Miller --- net/dsa/slave.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3fcc1d01615..b5e44825d173 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) From c8101f7729daee251f4f6505f9d135ec08e1342f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 2 Feb 2019 17:53:29 +0000 Subject: [PATCH 0596/1436] net: dsa: Fix lockdep false positive splat Creating a macvtap on a DSA-backed interface results in the following splat when lockdep is enabled: [ 19.638080] IPv6: ADDRCONF(NETDEV_CHANGE): lan0: link becomes ready [ 23.041198] device lan0 entered promiscuous mode [ 23.043445] device eth0 entered promiscuous mode [ 23.049255] [ 23.049557] ============================================ [ 23.055021] WARNING: possible recursive locking detected [ 23.060490] 5.0.0-rc3-00013-g56c857a1b8d3 #118 Not tainted [ 23.066132] -------------------------------------------- [ 23.071598] ip/2861 is trying to acquire lock: [ 23.076171] 00000000f61990cb (_xmit_ETHER){+...}, at: dev_set_rx_mode+0x1c/0x38 [ 23.083693] [ 23.083693] but task is already holding lock: [ 23.089696] 00000000ecf0c3b4 (_xmit_ETHER){+...}, at: dev_uc_add+0x24/0x70 [ 23.096774] [ 23.096774] other info that might help us debug this: [ 23.103494] Possible unsafe locking scenario: [ 23.103494] [ 23.109584] CPU0 [ 23.112093] ---- [ 23.114601] lock(_xmit_ETHER); [ 23.117917] lock(_xmit_ETHER); [ 23.121233] [ 23.121233] *** DEADLOCK *** [ 23.121233] [ 23.127325] May be due to missing lock nesting notation [ 23.127325] [ 23.134315] 2 locks held by ip/2861: [ 23.137987] #0: 000000003b766c72 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x338/0x4e0 [ 23.146231] #1: 00000000ecf0c3b4 (_xmit_ETHER){+...}, at: dev_uc_add+0x24/0x70 [ 23.153757] [ 23.153757] stack backtrace: [ 23.158243] CPU: 0 PID: 2861 Comm: ip Not tainted 5.0.0-rc3-00013-g56c857a1b8d3 #118 [ 23.166212] Hardware name: Globalscale Marvell ESPRESSOBin Board (DT) [ 23.172843] Call trace: [ 23.175358] dump_backtrace+0x0/0x188 [ 23.179116] show_stack+0x14/0x20 [ 23.182524] dump_stack+0xb4/0xec [ 23.185928] __lock_acquire+0x123c/0x1860 [ 23.190048] lock_acquire+0xc8/0x248 [ 23.193724] _raw_spin_lock_bh+0x40/0x58 [ 23.197755] dev_set_rx_mode+0x1c/0x38 [ 23.201607] dev_set_promiscuity+0x3c/0x50 [ 23.205820] dsa_slave_change_rx_flags+0x5c/0x70 [ 23.210567] __dev_set_promiscuity+0x148/0x1e0 [ 23.215136] __dev_set_rx_mode+0x74/0x98 [ 23.219167] dev_uc_add+0x54/0x70 [ 23.222575] macvlan_open+0x170/0x1d0 [ 23.226336] __dev_open+0xe0/0x160 [ 23.229830] __dev_change_flags+0x16c/0x1b8 [ 23.234132] dev_change_flags+0x20/0x60 [ 23.238074] do_setlink+0x2d0/0xc50 [ 23.241658] __rtnl_newlink+0x5f8/0x6e8 [ 23.245601] rtnl_newlink+0x50/0x78 [ 23.249184] rtnetlink_rcv_msg+0x360/0x4e0 [ 23.253397] netlink_rcv_skb+0xe8/0x130 [ 23.257338] rtnetlink_rcv+0x14/0x20 [ 23.261012] netlink_unicast+0x190/0x210 [ 23.265043] netlink_sendmsg+0x288/0x350 [ 23.269075] sock_sendmsg+0x18/0x30 [ 23.272659] ___sys_sendmsg+0x29c/0x2c8 [ 23.276602] __sys_sendmsg+0x60/0xb8 [ 23.280276] __arm64_sys_sendmsg+0x1c/0x28 [ 23.284488] el0_svc_common+0xd8/0x138 [ 23.288340] el0_svc_handler+0x24/0x80 [ 23.292192] el0_svc+0x8/0xc This looks fairly harmless (no actual deadlock occurs), and is fixed in a similar way to c6894dec8ea9 ("bridge: fix lockdep addr_list_lock false positive splat") by putting the addr_list_lock in its own lockdep class. Signed-off-by: Marc Zyngier Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/master.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dsa/master.c b/net/dsa/master.c index 71bb15f491c8..54f5551fb799 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -205,6 +205,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -218,6 +220,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) From b6cd7f34ba1354498de03e6882c33f935b071e99 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 4 Feb 2019 19:32:10 +0100 Subject: [PATCH 0597/1436] scsi: target: make the pi_prot_format ConfigFS path readable pi_prot_format conversion to write-only caused userspace breakage. Make the ConfigFS path readable again and hardcode the "0\n" content, matching previous output. Fixes: 6baca7601bde ("scsi: target: drop unused pi_prot_format attribute storage") Link: https://bugzilla.redhat.com/show_bug.cgi?id=1667505 Reported-by: Lee Duncan Reported-by: Laura Abbott Reviewed-by: Bart Van Assche Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 72016d0dfca5..8e7fffbb8802 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -852,6 +852,12 @@ static ssize_t pi_prot_type_store(struct config_item *item, return count; } +/* always zero, but attr needs to remain RW to avoid userspace breakage */ +static ssize_t pi_prot_format_show(struct config_item *item, char *page) +{ + return snprintf(page, PAGE_SIZE, "0\n"); +} + static ssize_t pi_prot_format_store(struct config_item *item, const char *page, size_t count) { @@ -1132,7 +1138,7 @@ CONFIGFS_ATTR(, emulate_3pc); CONFIGFS_ATTR(, emulate_pr); CONFIGFS_ATTR(, pi_prot_type); CONFIGFS_ATTR_RO(, hw_pi_prot_type); -CONFIGFS_ATTR_WO(, pi_prot_format); +CONFIGFS_ATTR(, pi_prot_format); CONFIGFS_ATTR(, pi_prot_verify); CONFIGFS_ATTR(, enforce_pr_isids); CONFIGFS_ATTR(, is_nonrot); From 88fc41c407158a7d2eaa4b2f7cfa289749d456c6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 30 Jan 2019 15:54:58 +0900 Subject: [PATCH 0598/1436] scsi: sd_zbc: Fix zone information messages Commit bf5054569653 ("block: Introduce blk_revalidate_disk_zones()") inadvertently broke the message output of sd_zbc_print_zones() because the zone information initialization of the scsi disk structure was moved to the second scan run while sd_zbc_print_zones() is called on the first scan. This leads to the following incorrect message to be printed for any ZBC or ZAC zoned disks. "...[sdX] 4294967295 zones of 0 logical blocks + 1 runt zone" Fix this by initializing sdkp zone size and number of zones early on the first scan. This does not impact the execution of blk_revalidate_zones(). This functions is still called only once the block device capacity is set on the second revalidate run on boot, or if the disk zone configuration changed (i.e. the disk changed). Fixes: bf5054569653 ("block: Introduce blk_revalidate_disk_zones()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 83365b29a4d8..fff86940388b 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -462,12 +462,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) sdkp->device->use_10_for_rw = 0; /* - * If something changed, revalidate the disk zone bitmaps once we have - * the capacity, that is on the second revalidate execution during disk - * scan and always during normal revalidate. + * Revalidate the disk zone bitmaps once the block device capacity is + * set on the second revalidate execution during disk scan and if + * something changed when executing a normal revalidate. */ - if (sdkp->first_scan) + if (sdkp->first_scan) { + sdkp->zone_blocks = zone_blocks; + sdkp->nr_zones = nr_zones; return 0; + } + if (sdkp->zone_blocks != zone_blocks || sdkp->nr_zones != nr_zones || disk->queue->nr_zones != nr_zones) { From d8f6382a7d026989029e2e50c515df954488459b Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 1 Feb 2019 14:42:28 +0000 Subject: [PATCH 0599/1436] Revert "scsi: libfc: Add WARN_ON() when deleting rports" This reverts commit bbc0f8bd88abefb0f27998f40a073634a3a2db89. It added a warning whose intent was to check whether the rport was still linked into the peer list. It doesn't work as intended and gives false positive warnings for two reasons: 1) If the rport is never linked into the peer list it will not be considered empty since the list_head is never initialized. 2) If the rport is deleted from the peer list using list_del_rcu(), then the list_head is in an undefined state and it is not considered empty. Signed-off-by: Ross Lagerwall Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 9192a1d9dec6..dfba4921b265 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -184,7 +184,6 @@ void fc_rport_destroy(struct kref *kref) struct fc_rport_priv *rdata; rdata = container_of(kref, struct fc_rport_priv, kref); - WARN_ON(!list_empty(&rdata->peers)); kfree_rcu(rdata, rcu); } EXPORT_SYMBOL(fc_rport_destroy); From bb61b843ffd46978d7ca5095453e572714934eeb Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 30 Jan 2019 17:56:51 +0530 Subject: [PATCH 0600/1436] scsi: cxlflash: Prevent deadlock when adapter probe fails Presently when an error is encountered during probe of the cxlflash adapter, a deadlock is seen with cpu thread stuck inside cxlflash_remove(). Below is the trace of the deadlock as logged by khungtaskd: cxlflash 0006:00:00.0: cxlflash_probe: init_afu failed rc=-16 INFO: task kworker/80:1:890 blocked for more than 120 seconds. Not tainted 5.0.0-rc4-capi2-kexec+ #2 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/80:1 D 0 890 2 0x00000808 Workqueue: events work_for_cpu_fn Call Trace: 0x4d72136320 (unreliable) __switch_to+0x2cc/0x460 __schedule+0x2bc/0xac0 schedule+0x40/0xb0 cxlflash_remove+0xec/0x640 [cxlflash] cxlflash_probe+0x370/0x8f0 [cxlflash] local_pci_probe+0x6c/0x140 work_for_cpu_fn+0x38/0x60 process_one_work+0x260/0x530 worker_thread+0x280/0x5d0 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x80 INFO: task systemd-udevd:5160 blocked for more than 120 seconds. The deadlock occurs as cxlflash_remove() is called from cxlflash_probe() without setting 'cxlflash_cfg->state' to STATE_PROBED and the probe thread starts to wait on 'cxlflash_cfg->reset_waitq'. Since the device was never successfully probed the 'cxlflash_cfg->state' never changes from STATE_PROBING hence the deadlock occurs. We fix this deadlock by setting the variable 'cxlflash_cfg->state' to STATE_PROBED in case an error occurs during cxlflash_probe() and just before calling cxlflash_remove(). Cc: stable@vger.kernel.org Fixes: c21e0bbfc485("cxlflash: Base support for IBM CXL Flash Adapter") Signed-off-by: Vaibhav Jain Signed-off-by: Martin K. Petersen --- drivers/scsi/cxlflash/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index bfa13e3b191c..c8bad2c093b8 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3687,6 +3687,7 @@ static int cxlflash_probe(struct pci_dev *pdev, host->max_cmd_len = CXLFLASH_MAX_CDB_LEN; cfg = shost_priv(host); + cfg->state = STATE_PROBING; cfg->host = host; rc = alloc_mem(cfg); if (rc) { @@ -3775,6 +3776,7 @@ out: return rc; out_remove: + cfg->state = STATE_PROBED; cxlflash_remove(pdev); goto out; } From ea5736805190e912903c27c9f17c7a4341a405e9 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 1 Feb 2019 16:26:08 +0000 Subject: [PATCH 0601/1436] arm64: kexec_file: handle empty command-line Calling strlen() on cmdline == NULL produces a kernel oops. Since having a NULL cmdline is valid, handle this case explicitly. Fixes: 52b2a8af7436 ("arm64: kexec_file: load initrd and device-tree") Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index f2c211a6229b..58871333737a 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -120,10 +120,12 @@ static int create_dtb(struct kimage *image, { void *buf; size_t buf_size; + size_t cmdline_len; int ret; + cmdline_len = cmdline ? strlen(cmdline) : 0; buf_size = fdt_totalsize(initial_boot_params) - + strlen(cmdline) + DTB_EXTRA_SPACE; + + cmdline_len + DTB_EXTRA_SPACE; for (;;) { buf = vmalloc(buf_size); From c6e2bd956936d925748581e4d0294f10f1d92f2c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 10 Jan 2019 07:59:16 -0800 Subject: [PATCH 0602/1436] i2c: omap: Use noirq system sleep pm ops to idle device for suspend We currently get the following error with pixcir_ts driver during a suspend resume cycle: omap_i2c 4802a000.i2c: controller timed out pixcir_ts 1-005c: pixcir_int_enable: can't read reg 0x34 : -110 pixcir_ts 1-005c: Failed to disable interrupt generation: -110 pixcir_ts 1-005c: Failed to stop dpm_run_callback(): pixcir_i2c_ts_resume+0x0/0x98 [pixcir_i2c_ts] returns -110 PM: Device 1-005c failed to resume: error -110 And at least am437x based devices with pixcir_ts will fail to resume to a touchscreen that is configured as the wakeup-source in device tree for these devices. This is because pixcir_ts tries to reconfigure it's registers for noirq suspend which fails. This also leaves i2c-omap in enabled state for suspend. Let's fix the pixcir_ts issue and make sure i2c-omap is suspended by adding SET_NOIRQ_SYSTEM_SLEEP_PM_OPS. Let's also get rid of some ifdefs while at it and replace them with __maybe_unused as SET_RUNTIME_PM_OPS and SET_NOIRQ_SYSTEM_SLEEP_PM_OPS already deal with the various PM Kconfig options. Reported-by: Keerthy Signed-off-by: Tony Lindgren Acked-by: Vignesh R Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-omap.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index b1086bfb0465..cd9c65f3d404 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1500,8 +1500,7 @@ static int omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int omap_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1527,7 +1526,7 @@ static int omap_i2c_runtime_suspend(struct device *dev) return 0; } -static int omap_i2c_runtime_resume(struct device *dev) +static int __maybe_unused omap_i2c_runtime_resume(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1542,20 +1541,18 @@ static int omap_i2c_runtime_resume(struct device *dev) } static const struct dev_pm_ops omap_i2c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend, omap_i2c_runtime_resume, NULL) }; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif /* CONFIG_PM */ static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", - .pm = OMAP_I2C_PM_OPS, + .pm = &omap_i2c_pm_ops, .of_match_table = of_match_ptr(omap_i2c_of_match), }, }; From 728354c005c36eaf44b6e5552372b67e60d17f56 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 31 Jan 2019 10:55:37 +0100 Subject: [PATCH 0603/1436] drm/vmwgfx: Return error code from vmw_execbuf_copy_fence_user The function was unconditionally returning 0, and a caller would have to rely on the returned fence pointer being NULL to detect errors. However, the function vmw_execbuf_copy_fence_user() would expect a non-zero error code in that case and would BUG otherwise. So make sure we return a proper non-zero error code if the fence pointer returned is NULL. Cc: Fixes: ae2a104058e2: ("vmwgfx: Implement fence objects") Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index f2d13a72c05d..88b8178d4687 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3570,7 +3570,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, *p_fence = NULL; } - return 0; + return ret; } /** From 51fdbeb4ca1a8415c98f87cb877956ae83e71627 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 31 Jan 2019 10:52:21 +0100 Subject: [PATCH 0604/1436] drm/vmwgfx: Fix an uninitialized fence handle value if vmw_execbuf_fence_commands() fails, The handle value will be uninitialized and a bogus fence handle might be copied to user-space. Cc: Fixes: 2724b2d54cda: ("drm/vmwgfx: Use new validation interface for the modesetting code v2") Reported-by: Dan Carpenter Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul #v1 Reviewed-by: Sinclair Yeh #v1 Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index b351fb5214d3..5e257a600cea 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2554,8 +2554,8 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, user_fence_rep) { struct vmw_fence_obj *fence = NULL; - uint32_t handle; - int ret; + uint32_t handle = 0; + int ret = 0; if (file_priv || user_fence_rep || vmw_validation_has_bos(ctx) || out_fence) From 479d59026fe44f89fa67efa01a4d47e00808e688 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Fri, 21 Dec 2018 14:38:35 -0800 Subject: [PATCH 0605/1436] drm/vmwgfx: Also check for crtc status while checking for DU active During modeset check it is possible to have all crtc_state's in atomic state. Check for crtc enable status while checking for display unit active status. Only error if enabling a crtc while display unit is not active. Cc: Fixes: 9da6e26c0aae: ("drm/vmwgfx: Fix a layout race condition") Signed-off-by: Deepak Rawat Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 5e257a600cea..ed2f67822f45 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1646,7 +1646,7 @@ static int vmw_kms_check_topology(struct drm_device *dev, struct drm_connector_state *conn_state; struct vmw_connector_state *vmw_conn_state; - if (!du->pref_active) { + if (!du->pref_active && new_crtc_state->enable) { ret = -EINVAL; goto clean; } From 4cbfa1e6c09e98450aab3240e5119b0ab2c9795b Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 28 Jan 2019 10:31:33 +0100 Subject: [PATCH 0606/1436] drm/vmwgfx: Fix setting of dma masks Previously we set only the dma mask and not the coherent mask. Fix that. Also, for clarity, make sure both are initially set to 64 bits. Cc: Fixes: 0d00c488f3de: ("drm/vmwgfx: Fix the driver for large dma addresses") Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 3e2bcff34032..ae9df4432bfc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -600,13 +600,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) static int vmw_dma_masks(struct vmw_private *dev_priv) { struct drm_device *dev = dev_priv->dev; + int ret = 0; - if (intel_iommu_enabled && + ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); + if (dev_priv->map_mode != vmw_dma_phys && (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } - return 0; + + return ret; } static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) From 9ddac734aa310c5fbc0ec93602335d2a39092451 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Thu, 17 Jan 2019 14:34:52 +0100 Subject: [PATCH 0607/1436] drm/vmwgfx: Improve on IOMMU detection instead of relying on intel_iommu_enabled, use the fact that the dma_map_ops::map_page != dma_direct_map_page. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index ae9df4432bfc..7ef5dcb06104 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -26,6 +26,7 @@ **************************************************************************/ #include #include +#include #include #include "vmwgfx_drv.h" @@ -34,7 +35,6 @@ #include #include #include -#include #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" #define VMWGFX_CHIP_SVGAII 0 @@ -545,6 +545,21 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) dev_priv->initial_height = height; } +/** + * vmw_assume_iommu - Figure out whether coherent dma-remapping might be + * taking place. + * @dev: Pointer to the struct drm_device. + * + * Return: true if iommu present, false otherwise. + */ +static bool vmw_assume_iommu(struct drm_device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev->dev); + + return !dma_is_direct(ops) && ops && + ops->map_page != dma_direct_map_page; +} + /** * vmw_dma_select_mode - Determine how DMA mappings should be set up for this * system. @@ -568,7 +583,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) if (vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; - else if (intel_iommu_enabled) + else if (vmw_assume_iommu(dev_priv->dev)) dev_priv->map_mode = vmw_dma_map_populate; else if (!vmw_force_iommu) dev_priv->map_mode = vmw_dma_phys; From 15df03c661cb362366ecfc3a21820cb934f3e4ca Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 21 Jan 2019 18:45:27 +0800 Subject: [PATCH 0608/1436] netfilter: ipv6: Don't preserve original oif for loopback address Commit 508b09046c0f ("netfilter: ipv6: Preserve link scope traffic original oif") made ip6_route_me_harder() keep the original oif for link-local and multicast packets. However, it also affected packets for the loopback address because it used rt6_need_strict(). REDIRECT rules in the OUTPUT chain rewrite the destination to loopback address; thus its oif should not be preserved. This commit fixes the bug that redirected local packets are being dropped. Actually the packet was not exactly dropped; Instead it was sent out to the original oif rather than lo. When a packet with daddr ::1 is sent to the router, it is effectively dropped. Fixes: 508b09046c0f ("netfilter: ipv6: Preserve link scope traffic original oif") Signed-off-by: Eli Cooper Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..6d0b1f3e927b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, From 947e492c0fc2132ae5fca081a9c2952ccaab0404 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Feb 2019 12:16:18 +0100 Subject: [PATCH 0609/1436] netfilter: nft_compat: don't use refcount_inc on newly allocated entry When I moved the refcount to refcount_t type I missed the fact that refcount_inc() will result in use-after-free warning with CONFIG_REFCOUNT_FULL=y builds. The correct fix would be to init the reference count to 1 at allocation time, but, unfortunately we cannot do this, as we can't undo that in case something else fails later in the batch. So only solution I see is to special-case the 'new entry' condition and replace refcount_inc() with a "delayed" refcount_set(1) in this case, as done here. The .activate callback can be removed to simplify things, we only need to make sure that deactivate() decrements/unlinks the entry from the list at end of transaction phase (commit or abort). Fixes: 12c44aba6618 ("netfilter: nft_compat: use refcnt_t type for nft_xt reference count") Reported-by: Jordan Glover Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 62 ++++++++++++++------------------------ 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 0732a2fc697c..fe64df848365 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -61,6 +61,21 @@ static struct nft_compat_net *nft_compat_pernet(struct net *net) return net_generic(net, nft_compat_net_id); } +static void nft_xt_get(struct nft_xt *xt) +{ + /* refcount_inc() warns on 0 -> 1 transition, but we can't + * init the reference count to 1 in .select_ops -- we can't + * undo such an increase when another expression inside the same + * rule fails afterwards. + */ + if (xt->listcnt == 0) + refcount_set(&xt->refcnt, 1); + else + refcount_inc(&xt->refcnt); + + xt->listcnt++; +} + static bool nft_xt_put(struct nft_xt *xt) { if (refcount_dec_and_test(&xt->refcnt)) { @@ -291,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -504,7 +519,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; nft_xt = container_of(expr->ops, struct nft_xt, ops); - refcount_inc(&nft_xt->refcnt); + nft_xt_get(nft_xt); return 0; } @@ -558,45 +573,16 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } -static void nft_compat_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - struct list_head *h) -{ - struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - - if (xt->listcnt == 0) - list_add(&xt->head, h); - - xt->listcnt++; -} - -static void nft_compat_activate_mt(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_match_list); -} - -static void nft_compat_activate_tg(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_compat_net *cn = nft_compat_pernet(ctx->net); - - nft_compat_activate(ctx, expr, &cn->nft_target_list); -} - static void nft_compat_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - if (phase == NFT_TRANS_COMMIT) - return; - - if (--xt->listcnt == 0) - list_del_init(&xt->head); + if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { + if (--xt->listcnt == 0) + list_del_init(&xt->head); + } } static void @@ -852,7 +838,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; - nft_match->ops.activate = nft_compat_activate_mt; nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; @@ -870,7 +855,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - nft_match->listcnt = 1; + nft_match->listcnt = 0; list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; @@ -957,7 +942,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; - nft_target->ops.activate = nft_compat_activate_tg; nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; @@ -968,7 +952,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - nft_target->listcnt = 1; + nft_target->listcnt = 0; list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; From 3e0b69bbed7161eee8702eee5dcdc18afc1dc982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 25 Jan 2019 20:38:46 +0200 Subject: [PATCH 0610/1436] drm/i915: Fix skl srckey mask bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're incorrectly masking off the R/V channel enable bit from KEYMSK. Fix it up. Cc: Maarten Lankhorst Cc: Matt Roper Fixes: b20815255693 ("drm/i915: Add plane alpha blending support, v2.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190125183846.28755-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper (cherry picked from commit 968bf969b47df2481022b9a05eaab02948eec088) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d2e003d8f3db..5170a0f5fe7b 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -494,7 +494,7 @@ skl_program_plane(struct intel_plane *plane, keymax = (key->max_value & 0xffffff) | PLANE_KEYMAX_ALPHA(alpha); - keymsk = key->channel_mask & 0x3ffffff; + keymsk = key->channel_mask & 0x7ffffff; if (alpha < 0xff) keymsk |= PLANE_KEYMSK_ALPHA_ENABLE; From 2a121030d4ee3f84f60c6f415f9c44bffbcde81d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 25 Jan 2019 14:24:42 -0800 Subject: [PATCH 0611/1436] drm/i915: always return something on DDI clock selection Even if we don't have the correct clock and get a warning, we should not skip the return. v2: improve commit message (from Joonas) Fixes: 1fa11ee2d9d0 ("drm/i915/icl: start adding the TBT pll") Cc: Paulo Zanoni Cc: # v4.19+ Signed-off-by: Lucas De Marchi Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20190125222444.19926-3-lucas.demarchi@intel.com (cherry picked from commit 7a61a6dec3dfb9f2e8c39a337580a3c3036c5cdf) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f3e1d6a0b7dd..4079050f9d6c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1086,7 +1086,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, return DDI_CLK_SEL_TBT_810; default: MISSING_CASE(clock); - break; + return DDI_CLK_SEL_NONE; } case DPLL_ID_ICL_MGPLL1: case DPLL_ID_ICL_MGPLL2: From 8f2f350cbdb2c2fbff654cb778139144b48a59ba Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Wed, 23 Jan 2019 16:52:29 -0800 Subject: [PATCH 0612/1436] perf script python: Add Python3 support to tests/attr.py Support both Python 2 and Python 3 in tests/attr.py The use of "except as" syntax implies the minimum supported Python2 version is now v2.6 Committer testing: $ make -C tools/perf PYTHON3=python install-bin Before: # perf test attr 16: Setup struct perf_event_attr : FAILED! 48: Synthesize attr update : Ok [root@quaco ~]# perf test -v attr 16: Setup struct perf_event_attr : --- start --- test child forked, pid 3121 File "/home/acme/libexec/perf-core/tests/attr.py", line 324 except Unsup, obj: ^ SyntaxError: invalid syntax test child finished with -1 ---- end ---- Setup struct perf_event_attr: FAILED! 48: Synthesize attr update : --- start --- test child forked, pid 3124 test child finished with 0 ---- end ---- Synthesize attr update: Ok # After: # perf test attr 16: Setup struct perf_event_attr : Ok 48: Synthesize attr update : Ok # Signed-off-by: Tony Jones Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Jonathan Corbet Cc: Ravi Bangoria Cc: Seeteena Thoufeek Link: http://lkml.kernel.org/r/20190124005229.16146-7-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index 44090a9a19f3..e952127e4fb0 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -1,6 +1,8 @@ #! /usr/bin/python # SPDX-License-Identifier: GPL-2.0 +from __future__ import print_function + import os import sys import glob @@ -8,7 +10,11 @@ import optparse import tempfile import logging import shutil -import ConfigParser + +try: + import configparser +except ImportError: + import ConfigParser as configparser def data_equal(a, b): # Allow multiple values in assignment separated by '|' @@ -100,20 +106,20 @@ class Event(dict): def equal(self, other): for t in Event.terms: log.debug(" [%s] %s %s" % (t, self[t], other[t])); - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: return False if not data_equal(self[t], other[t]): return False return True def optional(self): - if self.has_key('optional') and self['optional'] == '1': + if 'optional' in self and self['optional'] == '1': return True return False def diff(self, other): for t in Event.terms: - if not self.has_key(t) or not other.has_key(t): + if t not in self or t not in other: continue if not data_equal(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) @@ -134,7 +140,7 @@ class Event(dict): # - expected values assignments class Test(object): def __init__(self, path, options): - parser = ConfigParser.SafeConfigParser() + parser = configparser.SafeConfigParser() parser.read(path) log.warning("running '%s'" % path) @@ -193,7 +199,7 @@ class Test(object): return True def load_events(self, path, events): - parser_event = ConfigParser.SafeConfigParser() + parser_event = configparser.SafeConfigParser() parser_event.read(path) # The event record section header contains 'event' word, @@ -207,7 +213,7 @@ class Test(object): # Read parent event if there's any if (':' in section): base = section[section.index(':') + 1:] - parser_base = ConfigParser.SafeConfigParser() + parser_base = configparser.SafeConfigParser() parser_base.read(self.test_dir + '/' + base) base_items = parser_base.items('event') @@ -322,9 +328,9 @@ def run_tests(options): for f in glob.glob(options.test_dir + '/' + options.test): try: Test(f, options).run() - except Unsup, obj: + except Unsup as obj: log.warning("unsupp %s" % obj.getMsg()) - except Notest, obj: + except Notest as obj: log.warning("skipped %s" % obj.getMsg()) def setup_log(verbose): @@ -363,7 +369,7 @@ def main(): parser.add_option("-p", "--perf", action="store", type="string", dest="perf") parser.add_option("-v", "--verbose", - action="count", dest="verbose") + default=0, action="count", dest="verbose") options, args = parser.parse_args() if args: @@ -373,7 +379,7 @@ def main(): setup_log(options.verbose) if not options.test_dir: - print 'FAILED no -d option specified' + print('FAILED no -d option specified') sys.exit(-1) if not options.test: @@ -382,8 +388,8 @@ def main(): try: run_tests(options) - except Fail, obj: - print "FAILED %s" % obj.getMsg(); + except Fail as obj: + print("FAILED %s" % obj.getMsg()) sys.exit(-1) sys.exit(0) From a692933a87691681e880feb708081681ff32400a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 5 Feb 2019 07:19:11 -0600 Subject: [PATCH 0613/1436] signal: Always attempt to allocate siginfo for SIGSTOP Since 2.5.34 the code has had the potential to not allocate siginfo for SIGSTOP signals. Except for ptrace this is perfectly fine as only ptrace can use PTRACE_PEEK_SIGINFO and see what the contents of the delivered siginfo are. Users of PTRACE_PEEK_SIGINFO that care about the contents siginfo for SIGSTOP are rare, but they do exist. A seccomp self test has cared and lldb cares. Jack Andersen writes: > The patch titled > `signal: Never allocate siginfo for SIGKILL or SIGSTOP` > created a regression for users of PTRACE_GETSIGINFO needing to > discern signals that were raised via the tgkill syscall. > > A notable user of this tgkill+ptrace combination is lldb while > debugging a multithreaded program. Without the ability to detect a > SIGSTOP originating from tgkill, lldb does not have a way to > synchronize on a per-thread basis and falls back to SIGSTOP-ing the > entire process. Everyone affected by this please note. The kernel can still fail to allocate a siginfo structure. The allocation is with GFP_KERNEL and is best effort only. If memory is tight when the signal allocation comes in this will fail to allocate a siginfo. So I strongly recommend looking at more robust solutions for synchronizing with a single thread such as PTRACE_INTERRUPT. Or if that does not work persuading your friendly local kernel developer to build the interface you need. Reported-by: Tycho Andersen Reported-by: Kees Cook Reported-by: Jack Andersen Acked-by: Linus Torvalds Reviewed-by: Christian Brauner Cc: stable@vger.kernel.org Fixes: f149b3155744 ("signal: Never allocate siginfo for SIGKILL or SIGSTOP") Fixes: 6dfc88977e42 ("[PATCH] shared thread signals") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..9ca8e5278c8e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1057,10 +1057,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc result = TRACE_SIGNAL_DELIVERED; /* - * Skip useless siginfo allocation for SIGKILL SIGSTOP, - * and kernel threads. + * Skip useless siginfo allocation for SIGKILL and kernel threads. */ - if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) + if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) goto out_set; /* From 69eaab04c675ef2d0127a80b3395aa90dfd1061f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Feb 2019 17:29:44 -0800 Subject: [PATCH 0614/1436] btf: extract BTF type size calculation This pre-patch extracts calculation of amount of space taken by BTF type descriptor for later reuse by btf_dedup functionality. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 98 +++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7ec0463354db..06bd1a625ff4 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -182,6 +182,37 @@ static int btf_parse_str_sec(struct btf *btf) return 0; } +static int btf_type_size(struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); + return -EINVAL; + } +} + static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; @@ -191,41 +222,13 @@ static int btf_parse_type_sec(struct btf *btf) while (next_type < end_type) { struct btf_type *t = next_type; - __u16 vlen = BTF_INFO_VLEN(t->info); + int type_size; int err; - next_type += sizeof(*t); - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - next_type += sizeof(int); - break; - case BTF_KIND_ARRAY: - next_type += sizeof(struct btf_array); - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - next_type += vlen * sizeof(struct btf_member); - break; - case BTF_KIND_ENUM: - next_type += vlen * sizeof(struct btf_enum); - break; - case BTF_KIND_FUNC_PROTO: - next_type += vlen * sizeof(struct btf_param); - break; - case BTF_KIND_FUNC: - case BTF_KIND_TYPEDEF: - case BTF_KIND_PTR: - case BTF_KIND_FWD: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - break; - default: - pr_debug("Unsupported BTF_KIND:%u\n", - BTF_INFO_KIND(t->info)); - return -EINVAL; - } - + type_size = btf_type_size(t); + if (type_size < 0) + return type_size; + next_type += type_size; err = btf_add_type(btf, t); if (err) return err; @@ -252,21 +255,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t) return !t || btf_type_is_void(t); } -static __s64 btf_type_size(const struct btf_type *t) -{ - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - return t->size; - case BTF_KIND_PTR: - return sizeof(void *); - default: - return -EINVAL; - } -} - #define MAX_RESOLVE_DEPTH 32 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) @@ -280,11 +268,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { - size = btf_type_size(t); - if (size >= 0) - break; - switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + size = t->size; + goto done; + case BTF_KIND_PTR: + size = sizeof(void *); + goto done; case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: @@ -308,6 +301,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) if (size < 0) return -EINVAL; +done: if (nelems && size > UINT32_MAX / nelems) return -E2BIG; From d5caef5b56555bfa2ac0cf730f075864a023437e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Feb 2019 17:29:45 -0800 Subject: [PATCH 0615/1436] btf: add BTF types deduplication algorithm This patch implements BTF types deduplication algorithm. It allows to greatly compress typical output of pahole's DWARF-to-BTF conversion or LLVM's compilation output by detecting and collapsing identical types emitted in isolation per compilation unit. Algorithm also resolves struct/union forward declarations into concrete BTF types representing referenced struct/union. If undesired, this resolution can be disabled through specifying corresponding options. Algorithm itself and its application to Linux kernel's BTF types is described in details at: https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 1741 ++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 7 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 1749 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 06bd1a625ff4..e5097be16018 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -849,3 +849,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) { return btf_ext->line_info.rec_size; } + +struct btf_dedup; + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); +static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_strings(struct btf_dedup *d); +static int btf_dedup_prim_types(struct btf_dedup *d); +static int btf_dedup_struct_types(struct btf_dedup *d); +static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_compact_types(struct btf_dedup *d); +static int btf_dedup_remap_types(struct btf_dedup *d); + +/* + * Deduplicate BTF types and strings. + * + * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF + * section with all BTF type descriptors and string data. It overwrites that + * memory in-place with deduplicated types and strings without any loss of + * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section + * is provided, all the strings referenced from .BTF.ext section are honored + * and updated to point to the right offsets after deduplication. + * + * If function returns with error, type/string data might be garbled and should + * be discarded. + * + * More verbose and detailed description of both problem btf_dedup is solving, + * as well as solution could be found at: + * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html + * + * Problem description and justification + * ===================================== + * + * BTF type information is typically emitted either as a result of conversion + * from DWARF to BTF or directly by compiler. In both cases, each compilation + * unit contains information about a subset of all the types that are used + * in an application. These subsets are frequently overlapping and contain a lot + * of duplicated information when later concatenated together into a single + * binary. This algorithm ensures that each unique type is represented by single + * BTF type descriptor, greatly reducing resulting size of BTF data. + * + * Compilation unit isolation and subsequent duplication of data is not the only + * problem. The same type hierarchy (e.g., struct and all the type that struct + * references) in different compilation units can be represented in BTF to + * various degrees of completeness (or, rather, incompleteness) due to + * struct/union forward declarations. + * + * Let's take a look at an example, that we'll use to better understand the + * problem (and solution). Suppose we have two compilation units, each using + * same `struct S`, but each of them having incomplete type information about + * struct's fields: + * + * // CU #1: + * struct S; + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * // CU #2: + * struct S; + * struct A; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * In case of CU #1, BTF data will know only that `struct B` exist (but no + * more), but will know the complete type information about `struct A`. While + * for CU #2, it will know full type information about `struct B`, but will + * only know about forward declaration of `struct A` (in BTF terms, it will + * have `BTF_KIND_FWD` type descriptor with name `B`). + * + * This compilation unit isolation means that it's possible that there is no + * single CU with complete type information describing structs `S`, `A`, and + * `B`. Also, we might get tons of duplicated and redundant type information. + * + * Additional complication we need to keep in mind comes from the fact that + * types, in general, can form graphs containing cycles, not just DAGs. + * + * While algorithm does deduplication, it also merges and resolves type + * information (unless disabled throught `struct btf_opts`), whenever possible. + * E.g., in the example above with two compilation units having partial type + * information for structs `A` and `B`, the output of algorithm will emit + * a single copy of each BTF type that describes structs `A`, `B`, and `S` + * (as well as type information for `int` and pointers), as if they were defined + * in a single compilation unit as: + * + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * Algorithm summary + * ================= + * + * Algorithm completes its work in 6 separate passes: + * + * 1. Strings deduplication. + * 2. Primitive types deduplication (int, enum, fwd). + * 3. Struct/union types deduplication. + * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * protos, and const/volatile/restrict modifiers). + * 5. Types compaction. + * 6. Types remapping. + * + * Algorithm determines canonical type descriptor, which is a single + * representative type for each truly unique type. This canonical type is the + * one that will go into final deduplicated BTF type information. For + * struct/unions, it is also the type that algorithm will merge additional type + * information into (while resolving FWDs), as it discovers it from data in + * other CUs. Each input BTF type eventually gets either mapped to itself, if + * that type is canonical, or to some other type, if that type is equivalent + * and was chosen as canonical representative. This mapping is stored in + * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that + * FWD type got resolved to. + * + * To facilitate fast discovery of canonical types, we also maintain canonical + * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash + * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types + * that match that signature. With sufficiently good choice of type signature + * hashing function, we can limit number of canonical types for each unique type + * signature to a very small number, allowing to find canonical type for any + * duplicated type very quickly. + * + * Struct/union deduplication is the most critical part and algorithm for + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + int err; + + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return -EINVAL; + } + + err = btf_dedup_strings(d); + if (err < 0) { + pr_debug("btf_dedup_strings failed:%d\n", err); + goto done; + } + err = btf_dedup_prim_types(d); + if (err < 0) { + pr_debug("btf_dedup_prim_types failed:%d\n", err); + goto done; + } + err = btf_dedup_struct_types(d); + if (err < 0) { + pr_debug("btf_dedup_struct_types failed:%d\n", err); + goto done; + } + err = btf_dedup_ref_types(d); + if (err < 0) { + pr_debug("btf_dedup_ref_types failed:%d\n", err); + goto done; + } + err = btf_dedup_compact_types(d); + if (err < 0) { + pr_debug("btf_dedup_compact_types failed:%d\n", err); + goto done; + } + err = btf_dedup_remap_types(d); + if (err < 0) { + pr_debug("btf_dedup_remap_types failed:%d\n", err); + goto done; + } + +done: + btf_dedup_free(d); + return err; +} + +#define BTF_DEDUP_TABLE_SIZE_LOG 14 +#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1) +#define BTF_UNPROCESSED_ID ((__u32)-1) +#define BTF_IN_PROGRESS_ID ((__u32)-2) + +struct btf_dedup_node { + struct btf_dedup_node *next; + __u32 type_id; +}; + +struct btf_dedup { + /* .BTF section to be deduped in-place */ + struct btf *btf; + /* + * Optional .BTF.ext section. When provided, any strings referenced + * from it will be taken into account when deduping strings + */ + struct btf_ext *btf_ext; + /* + * This is a map from any type's signature hash to a list of possible + * canonical representative type candidates. Hash collisions are + * ignored, so even types of various kinds can share same list of + * candidates, which is fine because we rely on subsequent + * btf_xxx_equal() checks to authoritatively verify type equality. + */ + struct btf_dedup_node **dedup_table; + /* Canonical types map */ + __u32 *map; + /* Hypothetical mapping, used during type graph equivalence checks */ + __u32 *hypot_map; + __u32 *hypot_list; + size_t hypot_cnt; + size_t hypot_cap; + /* Various option modifying behavior of algorithm */ + struct btf_dedup_opts opts; +}; + +struct btf_str_ptr { + const char *str; + __u32 new_off; + bool used; +}; + +struct btf_str_ptrs { + struct btf_str_ptr *ptrs; + const char *data; + __u32 cnt; + __u32 cap; +}; + +static inline __u32 hash_combine(__u32 h, __u32 value) +{ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL + return h * 37 + value * GOLDEN_RATIO_PRIME; +#undef GOLDEN_RATIO_PRIME +} + +#define for_each_hash_node(table, hash, node) \ + for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next) + +static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) +{ + struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); + + if (!node) + return -ENOMEM; + node->type_id = type_id; + node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD]; + d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node; + return 0; +} + +static int btf_dedup_hypot_map_add(struct btf_dedup *d, + __u32 from_id, __u32 to_id) +{ + if (d->hypot_cnt == d->hypot_cap) { + __u32 *new_list; + + d->hypot_cap += max(16, d->hypot_cap / 2); + new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + if (!new_list) + return -ENOMEM; + d->hypot_list = new_list; + } + d->hypot_list[d->hypot_cnt++] = from_id; + d->hypot_map[from_id] = to_id; + return 0; +} + +static void btf_dedup_clear_hypot_map(struct btf_dedup *d) +{ + int i; + + for (i = 0; i < d->hypot_cnt; i++) + d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; + d->hypot_cnt = 0; +} + +static void btf_dedup_table_free(struct btf_dedup *d) +{ + struct btf_dedup_node *head, *tmp; + int i; + + if (!d->dedup_table) + return; + + for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) { + while (d->dedup_table[i]) { + tmp = d->dedup_table[i]; + d->dedup_table[i] = tmp->next; + free(tmp); + } + + head = d->dedup_table[i]; + while (head) { + tmp = head; + head = head->next; + free(tmp); + } + } + + free(d->dedup_table); + d->dedup_table = NULL; +} + +static void btf_dedup_free(struct btf_dedup *d) +{ + btf_dedup_table_free(d); + + free(d->map); + d->map = NULL; + + free(d->hypot_map); + d->hypot_map = NULL; + + free(d->hypot_list); + d->hypot_list = NULL; + + free(d); +} + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + int i, err = 0; + + if (!d) + return ERR_PTR(-ENOMEM); + + d->btf = btf; + d->btf_ext = btf_ext; + + d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG, + sizeof(struct btf_dedup_node *)); + if (!d->dedup_table) { + err = -ENOMEM; + goto done; + } + + d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->map) { + err = -ENOMEM; + goto done; + } + /* special BTF "void" type is made canonical immediately */ + d->map[0] = 0; + for (i = 1; i <= btf->nr_types; i++) + d->map[i] = BTF_UNPROCESSED_ID; + + d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->hypot_map) { + err = -ENOMEM; + goto done; + } + for (i = 0; i <= btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + +done: + if (err) { + btf_dedup_free(d); + return ERR_PTR(err); + } + + return d; +} + +typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); + +/* + * Iterate over all possible places in .BTF and .BTF.ext that can reference + * string and pass pointer to it to a provided callback `fn`. + */ +static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) +{ + void *line_data_cur, *line_data_end; + int i, j, r, rec_size; + struct btf_type *t; + + for (i = 1; i <= d->btf->nr_types; i++) { + t = d->btf->types[i]; + r = fn(&t->name_off, ctx); + if (r) + return r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_ENUM: { + struct btf_enum *m = (struct btf_enum *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *m = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + default: + break; + } + } + + if (!d->btf_ext) + return 0; + + line_data_cur = d->btf_ext->line_info.info; + line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; + rec_size = d->btf_ext->line_info.rec_size; + + while (line_data_cur < line_data_end) { + struct btf_ext_info_sec *sec = line_data_cur; + struct bpf_line_info_min *line_info; + __u32 num_info = sec->num_info; + + r = fn(&sec->sec_name_off, ctx); + if (r) + return r; + + line_data_cur += sizeof(struct btf_ext_info_sec); + for (i = 0; i < num_info; i++) { + line_info = line_data_cur; + r = fn(&line_info->file_name_off, ctx); + if (r) + return r; + r = fn(&line_info->line_off, ctx); + if (r) + return r; + line_data_cur += rec_size; + } + } + + return 0; +} + +static int str_sort_by_content(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + return strcmp(p1->str, p2->str); +} + +static int str_sort_by_offset(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + if (p1->str != p2->str) + return p1->str < p2->str ? -1 : 1; + return 0; +} + +static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) +{ + const struct btf_str_ptr *p = pelem; + + if (str_ptr != p->str) + return (const char *)str_ptr < p->str ? -1 : 1; + return 0; +} + +static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + s->used = true; + return 0; +} + +static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + *str_off_ptr = s->new_off; + return 0; +} + +/* + * Dedup string and filter out those that are not referenced from either .BTF + * or .BTF.ext (if provided) sections. + * + * This is done by building index of all strings in BTF's string section, + * then iterating over all entities that can reference strings (e.g., type + * names, struct field names, .BTF.ext line info, etc) and marking corresponding + * strings as used. After that all used strings are deduped and compacted into + * sequential blob of memory and new offsets are calculated. Then all the string + * references are iterated again and rewritten using new offsets. + */ +static int btf_dedup_strings(struct btf_dedup *d) +{ + const struct btf_header *hdr = d->btf->hdr; + char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *end = start + d->btf->hdr->str_len; + char *p = start, *tmp_strs = NULL; + struct btf_str_ptrs strs = { + .cnt = 0, + .cap = 0, + .ptrs = NULL, + .data = start, + }; + int i, j, err = 0, grp_idx; + bool grp_used; + + /* build index of all strings */ + while (p < end) { + if (strs.cnt + 1 > strs.cap) { + struct btf_str_ptr *new_ptrs; + + strs.cap += max(strs.cnt / 2, 16); + new_ptrs = realloc(strs.ptrs, + sizeof(strs.ptrs[0]) * strs.cap); + if (!new_ptrs) { + err = -ENOMEM; + goto done; + } + strs.ptrs = new_ptrs; + } + + strs.ptrs[strs.cnt].str = p; + strs.ptrs[strs.cnt].used = false; + + p += strlen(p) + 1; + strs.cnt++; + } + + /* temporary storage for deduplicated strings */ + tmp_strs = malloc(d->btf->hdr->str_len); + if (!tmp_strs) { + err = -ENOMEM; + goto done; + } + + /* mark all used strings */ + strs.ptrs[0].used = true; + err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); + if (err) + goto done; + + /* sort strings by context, so that we can identify duplicates */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + + /* + * iterate groups of equal strings and if any instance in a group was + * referenced, emit single instance and remember new offset + */ + p = tmp_strs; + grp_idx = 0; + grp_used = strs.ptrs[0].used; + /* iterate past end to avoid code duplication after loop */ + for (i = 1; i <= strs.cnt; i++) { + /* + * when i == strs.cnt, we want to skip string comparison and go + * straight to handling last group of strings (otherwise we'd + * need to handle last group after the loop w/ duplicated code) + */ + if (i < strs.cnt && + !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { + grp_used = grp_used || strs.ptrs[i].used; + continue; + } + + /* + * this check would have been required after the loop to handle + * last group of strings, but due to <= condition in a loop + * we avoid that duplication + */ + if (grp_used) { + int new_off = p - tmp_strs; + __u32 len = strlen(strs.ptrs[grp_idx].str); + + memmove(p, strs.ptrs[grp_idx].str, len + 1); + for (j = grp_idx; j < i; j++) + strs.ptrs[j].new_off = new_off; + p += len + 1; + } + + if (i < strs.cnt) { + grp_idx = i; + grp_used = strs.ptrs[i].used; + } + } + + /* replace original strings with deduped ones */ + d->btf->hdr->str_len = p - tmp_strs; + memmove(start, tmp_strs, d->btf->hdr->str_len); + end = start + d->btf->hdr->str_len; + + /* restore original order for further binary search lookups */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + + /* remap string offsets */ + err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + if (err) + goto done; + + d->btf->hdr->str_len = end - start; + +done: + free(tmp_strs); + free(strs.ptrs); + return err; +} + +static __u32 btf_hash_common(struct btf_type *t) +{ + __u32 h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + h = hash_combine(h, t->size); + return h; +} + +static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info && + t1->size == t2->size; +} + +/* Calculate type signature hash of INT. */ +static __u32 btf_hash_int(struct btf_type *t) +{ + __u32 info = *(__u32 *)(t + 1); + __u32 h; + + h = btf_hash_common(t); + h = hash_combine(h, info); + return h; +} + +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +{ + __u32 info1, info2; + + if (!btf_equal_common(t1, t2)) + return false; + info1 = *(__u32 *)(t1 + 1); + info2 = *(__u32 *)(t2 + 1); + return info1 == info2; +} + +/* Calculate type signature hash of ENUM. */ +static __u32 btf_hash_enum(struct btf_type *t) +{ + struct btf_enum *member = (struct btf_enum *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->val); + member++; + } + return h; +} + +/* Check structural equality of two ENUMs. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_enum *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_enum *)(t1 + 1); + m2 = (struct btf_enum *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val != m2->val) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static __u32 btf_hash_struct(struct btf_type *t) +{ + struct btf_member *member = (struct btf_member *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->offset); + /* no hashing of referenced type ID, it can be unresolved yet */ + member++; + } + return h; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_member *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_member *)(t1 + 1); + m2 = (struct btf_member *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->offset != m2->offset) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of ARRAY, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static __u32 btf_hash_array(struct btf_type *t) +{ + struct btf_array *info = (struct btf_array *)(t + 1); + __u32 h = btf_hash_common(t); + + h = hash_combine(h, info->type); + h = hash_combine(h, info->index_type); + h = hash_combine(h, info->nelems); + return h; +} + +/* + * Check exact equality of two ARRAYs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * ARRAY to potential canonical representative. + */ +static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->type == info2->type && + info1->index_type == info2->index_type && + info1->nelems == info2->nelems; +} + +/* + * Check structural compatibility of two ARRAYs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->nelems == info2->nelems; +} + +/* + * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static inline __u32 btf_hash_fnproto(struct btf_type *t) +{ + struct btf_param *member = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->type); + member++; + } + return h; +} + +/* + * Check exact equality of two FUNC_PROTOs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * FUNC_PROTO to potential canonical representative. + */ +static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->type != m2->type) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + /* skip return type ID */ + if (t1->name_off != t2->name_off || t1->info != t2->info) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Deduplicate primitive types, that can't reference other types, by calculating + * their type signature hash and comparing them with any possible canonical + * candidate. If no canonical candidate matches, type itself is marked as + * canonical and is added into `btf_dedup->dedup_table` as another candidate. + */ +static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + struct btf_type *cand; + struct btf_dedup_node *cand_node; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u32 h; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + return 0; + + case BTF_KIND_INT: + h = btf_hash_int(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_int(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_enum(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_FWD: + h = btf_hash_common(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_prim_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Check whether type is already mapped into canonical one (could be to itself). + */ +static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) +{ + return d->map[type_id] <= BTF_MAX_TYPE; +} + +/* + * Resolve type ID into its canonical type ID, if any; otherwise return original + * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow + * STRUCT/UNION link and resolve it into canonical type ID as well. + */ +static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) +{ + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + return type_id; +} + +/* + * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original + * type ID. + */ +static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) +{ + __u32 orig_type_id = type_id; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + return orig_type_id; +} + + +static inline __u16 btf_fwd_kind(struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT; +} + +/* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph + * formed by (potential) canonical struct/union ("canonical graph" for brevity + * here, though keep in mind that not all types in canonical graph are + * necessarily canonical representatives themselves, some of them might be + * duplicates or its uniqueness might not have been established yet). + * Returns: + * - >0, if type graphs are equivalent; + * - 0, if not equivalent; + * - <0, on error. + * + * Algorithm performs side-by-side DFS traversal of both type graphs and checks + * equivalence of BTF types at each step. If at any point BTF types in candidate + * and canonical graphs are not compatible structurally, whole graphs are + * incompatible. If types are structurally equivalent (i.e., all information + * except referenced type IDs is exactly the same), a mapping from `canon_id` to + * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * If a type references other types, then those referenced types are checked + * for equivalence recursively. + * + * During DFS traversal, if we find that for current `canon_id` type we + * already have some mapping in hypothetical map, we check for two possible + * situations: + * - `canon_id` is mapped to exactly the same type as `cand_id`. This will + * happen when type graphs have cycles. In this case we assume those two + * types are equivalent. + * - `canon_id` is mapped to different type. This is contradiction in our + * hypothetical mapping, because same graph in canonical graph corresponds + * to two different types in candidate graph, which for equivalent type + * graphs shouldn't happen. This condition terminates equivalence check + * with negative result. + * + * If type graphs traversal exhausts types to check and find no contradiction, + * then type graphs are equivalent. + * + * When checking types for equivalence, there is one special case: FWD types. + * If FWD type resolution is allowed and one of the types (either from canonical + * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind + * flag) and their names match, hypothetical mapping is updated to point from + * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, + * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. + * + * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, + * if there are two exactly named (or anonymous) structs/unions that are + * compatible structurally, one of which has FWD field, while other is concrete + * STRUCT/UNION, but according to C sources they are different structs/unions + * that are referencing different types with the same name. This is extremely + * unlikely to happen, but btf_dedup API allows to disable FWD resolution if + * this logic is causing problems. + * + * Doing FWD resolution means that both candidate and/or canonical graphs can + * consists of portions of the graph that come from multiple compilation units. + * This is due to the fact that types within single compilation unit are always + * deduplicated and FWDs are already resolved, if referenced struct/union + * definiton is available. So, if we had unresolved FWD and found corresponding + * STRUCT/UNION, they will be from different compilation units. This + * consequently means that when we "link" FWD to corresponding STRUCT/UNION, + * type graph will likely have at least two different BTF types that describe + * same type (e.g., most probably there will be two different BTF types for the + * same 'int' primitive type) and could even have "overlapping" parts of type + * graph that describe same subset of types. + * + * This in turn means that our assumption that each type in canonical graph + * must correspond to exactly one type in candidate graph might not hold + * anymore and will make it harder to detect contradictions using hypothetical + * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION + * resolution only in canonical graph. FWDs in candidate graphs are never + * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs + * that can occur: + * - Both types in canonical and candidate graphs are FWDs. If they are + * structurally equivalent, then they can either be both resolved to the + * same STRUCT/UNION or not resolved at all. In both cases they are + * equivalent and there is no need to resolve FWD on candidate side. + * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, + * so nothing to resolve as well, algorithm will check equivalence anyway. + * - Type in canonical graph is FWD, while type in candidate is concrete + * STRUCT/UNION. In this case candidate graph comes from single compilation + * unit, so there is exactly one BTF type for each unique C type. After + * resolving FWD into STRUCT/UNION, there might be more than one BTF type + * in canonical graph mapping to single BTF type in candidate graph, but + * because hypothetical mapping maps from canonical to candidate types, it's + * alright, and we still maintain the property of having single `canon_id` + * mapping to single `cand_id` (there could be two different `canon_id` + * mapped to the same `cand_id`, but it's not contradictory). + * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate + * graph is FWD. In this case we are just going to check compatibility of + * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll + * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to + * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs + * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from + * canonical graph. + */ +static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, + __u32 canon_id) +{ + struct btf_type *cand_type; + struct btf_type *canon_type; + __u32 hypot_type_id; + __u16 cand_kind; + __u16 canon_kind; + int i, eq; + + /* if both resolve to the same canonical, they must be equivalent */ + if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) + return 1; + + canon_id = resolve_fwd_id(d, canon_id); + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_TYPE) + return hypot_type_id == cand_id; + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) + return -ENOMEM; + + cand_type = d->btf->types[cand_id]; + canon_type = d->btf->types[canon_id]; + cand_kind = BTF_INFO_KIND(cand_type->info); + canon_kind = BTF_INFO_KIND(canon_type->info); + + if (cand_type->name_off != canon_type->name_off) + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; + + if (cand_kind == BTF_KIND_FWD) { + real_kind = canon_kind; + fwd_kind = btf_fwd_kind(cand_type); + } else { + real_kind = cand_kind; + fwd_kind = btf_fwd_kind(canon_type); + } + return fwd_kind == real_kind; + } + + if (cand_type->info != canon_type->info) + return 0; + + switch (cand_kind) { + case BTF_KIND_INT: + return btf_equal_int(cand_type, canon_type); + + case BTF_KIND_ENUM: + return btf_equal_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + return btf_equal_common(cand_type, canon_type); + + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + + case BTF_KIND_ARRAY: { + struct btf_array *cand_arr, *canon_arr; + + if (!btf_compat_array(cand_type, canon_type)) + return 0; + cand_arr = (struct btf_array *)(cand_type + 1); + canon_arr = (struct btf_array *)(canon_type + 1); + eq = btf_dedup_is_equiv(d, + cand_arr->index_type, canon_arr->index_type); + if (eq <= 0) + return eq; + return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *cand_m, *canon_m; + __u16 vlen; + + if (!btf_equal_struct(cand_type, canon_type)) + return 0; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_m = (struct btf_member *)(cand_type + 1); + canon_m = (struct btf_member *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); + if (eq <= 0) + return eq; + cand_m++; + canon_m++; + } + + return 1; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *cand_p, *canon_p; + __u16 vlen; + + if (!btf_compat_fnproto(cand_type, canon_type)) + return 0; + eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + if (eq <= 0) + return eq; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_p = (struct btf_param *)(cand_type + 1); + canon_p = (struct btf_param *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); + if (eq <= 0) + return eq; + cand_p++; + canon_p++; + } + return 1; + } + + default: + return -EINVAL; + } + return 0; +} + +/* + * Use hypothetical mapping, produced by successful type graph equivalence + * check, to augment existing struct/union canonical mapping, where possible. + * + * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record + * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: + * it doesn't matter if FWD type was part of canonical graph or candidate one, + * we are recording the mapping anyway. As opposed to carefulness required + * for struct/union correspondence mapping (described below), for FWD resolution + * it's not important, as by the time that FWD type (reference type) will be + * deduplicated all structs/unions will be deduped already anyway. + * + * Recording STRUCT/UNION mapping is purely a performance optimization and is + * not required for correctness. It needs to be done carefully to ensure that + * struct/union from candidate's type graph is not mapped into corresponding + * struct/union from canonical type graph that itself hasn't been resolved into + * canonical representative. The only guarantee we have is that canonical + * struct/union was determined as canonical and that won't change. But any + * types referenced through that struct/union fields could have been not yet + * resolved, so in case like that it's too early to establish any kind of + * correspondence between structs/unions. + * + * No canonical correspondence is derived for primitive types (they are already + * deduplicated completely already anyway) or reference types (they rely on + * stability of struct/union canonical relationship for equivalence checks). + */ +static void btf_dedup_merge_hypot_map(struct btf_dedup *d) +{ + __u32 cand_type_id, targ_type_id; + __u16 t_kind, c_kind; + __u32 t_id, c_id; + int i; + + for (i = 0; i < d->hypot_cnt; i++) { + cand_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[cand_type_id]; + t_id = resolve_type_id(d, targ_type_id); + c_id = resolve_type_id(d, cand_type_id); + t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info); + c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info); + /* + * Resolve FWD into STRUCT/UNION. + * It's ok to resolve FWD into STRUCT/UNION that's not yet + * mapped to canonical representative (as opposed to + * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because + * eventually that struct is going to be mapped and all resolved + * FWDs will automatically resolve to correct canonical + * representative. This will happen before ref type deduping, + * which critically depends on stability of these mapping. This + * stability is not a requirement for STRUCT/UNION equivalence + * checks, though. + */ + if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) + d->map[c_id] = t_id; + else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + d->map[t_id] = c_id; + + if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && + c_kind != BTF_KIND_FWD && + is_type_mapped(d, c_id) && + !is_type_mapped(d, t_id)) { + /* + * as a perf optimization, we can map struct/union + * that's part of type graph we just verified for + * equivalence. We can do that for struct/union that has + * canonical representative only, though. + */ + d->map[t_id] = c_id; + } + } +} + +/* + * Deduplicate struct/union types. + * + * For each struct/union type its type signature hash is calculated, taking + * into account type's name, size, number, order and names of fields, but + * ignoring type ID's referenced from fields, because they might not be deduped + * completely until after reference types deduplication phase. This type hash + * is used to iterate over all potential canonical types, sharing same hash. + * For each canonical candidate we check whether type graphs that they form + * (through referenced types in fields and so on) are equivalent using algorithm + * implemented in `btf_dedup_is_equiv`. If such equivalence is found and + * BTF_KIND_FWD resolution is allowed, then hypothetical mapping + * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence + * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to + * potentially map other structs/unions to their canonical representatives, + * if such relationship hasn't yet been established. This speeds up algorithm + * by eliminating some of the duplicate work. + * + * If no matching canonical representative was found, struct/union is marked + * as canonical for itself and is added into btf_dedup->dedup_table hash map + * for further look ups. + */ +static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u16 kind; + __u32 h; + + /* already deduped or is in process of deduping (loop detected) */ + if (d->map[type_id] <= BTF_MAX_TYPE) + return 0; + + t = d->btf->types[type_id]; + kind = BTF_INFO_KIND(t->info); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + return 0; + + h = btf_hash_struct(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + int eq; + + btf_dedup_clear_hypot_map(d); + eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); + if (eq < 0) + return eq; + if (!eq) + continue; + new_id = cand_node->type_id; + btf_dedup_merge_hypot_map(d); + break; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_struct_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Deduplicate reference type. + * + * Once all primitive and struct/union types got deduplicated, we can easily + * deduplicate all other (reference) BTF types. This is done in two steps: + * + * 1. Resolve all referenced type IDs into their canonical type IDs. This + * resolution can be done either immediately for primitive or struct/union types + * (because they were deduped in previous two phases) or recursively for + * reference types. Recursion will always terminate at either primitive or + * struct/union type, at which point we can "unwind" chain of reference types + * one by one. There is no danger of encountering cycles because in C type + * system the only way to form type cycle is through struct/union, so any chain + * of reference types, even those taking part in a type cycle, will inevitably + * reach struct/union at some point. + * + * 2. Once all referenced type IDs are resolved into canonical ones, BTF type + * becomes "stable", in the sense that no further deduplication will cause + * any changes to it. With that, it's now possible to calculate type's signature + * hash (this time taking into account referenced type IDs) and loop over all + * potential canonical representatives. If no match was found, current type + * will become canonical representative of itself and will be added into + * btf_dedup->dedup_table as another possible canonical representative. + */ +static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t, *cand; + /* if we don't find equivalent type, then we are representative type */ + __u32 new_id = type_id; + __u32 h, ref_type_id; + + if (d->map[type_id] == BTF_IN_PROGRESS_ID) + return -ELOOP; + if (d->map[type_id] <= BTF_MAX_TYPE) + return resolve_type_id(d, type_id); + + t = d->btf->types[type_id]; + d->map[type_id] = BTF_IN_PROGRESS_ID; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_common(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ARRAY: { + struct btf_array *info = (struct btf_array *)(t + 1); + + ref_type_id = btf_dedup_ref_type(d, info->type); + if (ref_type_id < 0) + return ref_type_id; + info->type = ref_type_id; + + ref_type_id = btf_dedup_ref_type(d, info->index_type); + if (ref_type_id < 0) + return ref_type_id; + info->index_type = ref_type_id; + + h = btf_hash_array(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_array(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param; + __u16 vlen; + int i; + + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + vlen = BTF_INFO_VLEN(t->info); + param = (struct btf_param *)(t + 1); + for (i = 0; i < vlen; i++) { + ref_type_id = btf_dedup_ref_type(d, param->type); + if (ref_type_id < 0) + return ref_type_id; + param->type = ref_type_id; + param++; + } + + h = btf_hash_fnproto(t); + for_each_hash_node(d->dedup_table, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_fnproto(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return new_id; +} + +static int btf_dedup_ref_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, i); + if (err < 0) + return err; + } + btf_dedup_table_free(d); + return 0; +} + +/* + * Compact types. + * + * After we established for each type its corresponding canonical representative + * type, we now can eliminate types that are not canonical and leave only + * canonical ones layed out sequentially in memory by copying them over + * duplicates. During compaction btf_dedup->hypot_map array is reused to store + * a map from original type ID to a new compacted type ID, which will be used + * during next phase to "fix up" type IDs, referenced from struct/union and + * reference types. + */ +static int btf_dedup_compact_types(struct btf_dedup *d) +{ + struct btf_type **new_types; + __u32 next_type_id = 1; + char *types_start, *p; + int i, len; + + /* we are going to reuse hypot_map to store compaction remapping */ + d->hypot_map[0] = 0; + for (i = 1; i <= d->btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + types_start = d->btf->nohdr_data + d->btf->hdr->type_off; + p = types_start; + + for (i = 1; i <= d->btf->nr_types; i++) { + if (d->map[i] != i) + continue; + + len = btf_type_size(d->btf->types[i]); + if (len < 0) + return len; + + memmove(p, d->btf->types[i], len); + d->hypot_map[i] = next_type_id; + d->btf->types[next_type_id] = (struct btf_type *)p; + p += len; + next_type_id++; + } + + /* shrink struct btf's internal types index and update btf_header */ + d->btf->nr_types = next_type_id - 1; + d->btf->types_size = d->btf->nr_types; + d->btf->hdr->type_len = p - types_start; + new_types = realloc(d->btf->types, + (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + if (!new_types) + return -ENOMEM; + d->btf->types = new_types; + + /* make sure string section follows type information without gaps */ + d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + memmove(p, d->btf->strings, d->btf->hdr->str_len); + d->btf->strings = p; + p += d->btf->hdr->str_len; + + d->btf->data_size = p - (char *)d->btf->data; + return 0; +} + +/* + * Figure out final (deduplicated and compacted) type ID for provided original + * `type_id` by first resolving it into corresponding canonical type ID and + * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, + * which is populated during compaction phase. + */ +static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) +{ + __u32 resolved_type_id, new_type_id; + + resolved_type_id = resolve_type_id(d, type_id); + new_type_id = d->hypot_map[resolved_type_id]; + if (new_type_id > BTF_MAX_TYPE) + return -EINVAL; + return new_type_id; +} + +/* + * Remap referenced type IDs into deduped type IDs. + * + * After BTF types are deduplicated and compacted, their final type IDs may + * differ from original ones. The map from original to a corresponding + * deduped type ID is stored in btf_dedup->hypot_map and is populated during + * compaction phase. During remapping phase we are rewriting all type IDs + * referenced from any BTF type (e.g., struct fields, func proto args, etc) to + * their final deduped type IDs. + */ +static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + int i, r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + break; + + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + break; + + case BTF_KIND_ARRAY: { + struct btf_array *arr_info = (struct btf_array *)(t + 1); + + r = btf_dedup_remap_type_id(d, arr_info->type); + if (r < 0) + return r; + arr_info->type = r; + r = btf_dedup_remap_type_id(d, arr_info->index_type); + if (r < 0) + return r; + arr_info->index_type = r; + break; + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *member = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, member->type); + if (r < 0) + return r; + member->type = r; + member++; + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, param->type); + if (r < 0) + return r; + param->type = r; + param++; + } + break; + } + + default: + return -EINVAL; + } + + return 0; +} + +static int btf_dedup_remap_types(struct btf_dedup *d) +{ + int i, r; + + for (i = 1; i <= d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, i); + if (r < 0) + return r; + } + return 0; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 258c87e9f55d..c739de7ed993 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -84,6 +84,13 @@ LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +struct btf_dedup_opts { + bool dont_resolve_fwds; +}; + +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7990e857e003..7e4a8c1e1c1c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -133,6 +133,7 @@ LIBBPF_0.0.2 { bpf_map_lookup_elem_flags; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; + btf__dedup; btf__get_map_kv_tids; btf_ext__free; btf_ext__func_info_rec_size; From 9c651127445c2fb3dca7b000bce6de97290288ff Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Feb 2019 17:29:46 -0800 Subject: [PATCH 0616/1436] selftests/btf: add initial BTF dedup tests This patch sets up a new kind of tests (BTF dedup tests) and tests few aspects of BTF dedup algorithm. More complete set of tests will come in follow up patches. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 12 + tools/lib/bpf/btf.h | 3 + tools/lib/bpf/libbpf.map | 2 + tools/testing/selftests/bpf/test_btf.c | 535 ++++++++++++++++++++++++- 4 files changed, 537 insertions(+), 15 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index e5097be16018..4949f8840bda 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -237,6 +237,11 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } +__u32 btf__get_nr_types(const struct btf *btf) +{ + return btf->nr_types; +} + const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) @@ -427,6 +432,13 @@ int btf__fd(const struct btf *btf) return btf->fd; } +void btf__get_strings(const struct btf *btf, const char **strings, + __u32 *str_len) +{ + *strings = btf->strings; + *str_len = btf->hdr->str_len; +} + const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index c739de7ed993..25a9d2db035d 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -59,11 +59,14 @@ LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings, + __u32 *str_len); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, char *map_name, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7e4a8c1e1c1c..89c1149e32ee 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -135,6 +135,8 @@ LIBBPF_0.0.2 { bpf_get_link_xdp_id; btf__dedup; btf__get_map_kv_tids; + btf__get_nr_types; + btf__get_strings; btf_ext__free; btf_ext__func_info_rec_size; btf_ext__line_info_rec_size; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 5afab823ffbe..30c3edde7e07 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -70,12 +70,21 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_FWD_ENC(name, kind_flag) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0) + #define BTF_ARRAY_ENC(type, index_type, nr_elems) \ (type), (index_type), (nr_elems) #define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ BTF_ARRAY_ENC(type, index_type, nr_elems) +#define BTF_STRUCT_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz) + +#define BTF_UNION_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz) + #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) @@ -91,6 +100,12 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), #define BTF_CONST_ENC(type) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) +#define BTF_VOLATILE_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type) + +#define BTF_RESTRICT_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type) + #define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) @@ -103,6 +118,10 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f +#define NAME_NTH(N) (0xffff0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) +#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) + #define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 @@ -111,12 +130,14 @@ static struct args { unsigned int file_test_num; unsigned int get_info_test_num; unsigned int info_raw_test_num; + unsigned int dedup_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; bool info_raw_test; + bool dedup_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -2827,11 +2848,13 @@ static void *btf_raw_create(const struct btf_header *hdr, const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; + const char **strs_idx = NULL, **tmp_strs_idx; + int strs_cap = 0, strs_cnt = 0, next_str_idx = 0; unsigned int size_needed, offset; struct btf_header *ret_hdr; - int i, type_sec_size; + int i, type_sec_size, err = 0; uint32_t *ret_types; - void *raw_btf; + void *raw_btf = NULL; type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) @@ -2846,17 +2869,44 @@ static void *btf_raw_create(const struct btf_header *hdr, memcpy(raw_btf, hdr, sizeof(*hdr)); offset = sizeof(*hdr); + /* Index strings */ + while ((next_str = get_next_str(next_str, end_str))) { + if (strs_cnt == strs_cap) { + strs_cap += max(16, strs_cap / 2); + tmp_strs_idx = realloc(strs_idx, + sizeof(*strs_idx) * strs_cap); + if (CHECK(!tmp_strs_idx, + "Cannot allocate memory for strs_idx")) { + err = -1; + goto done; + } + strs_idx = tmp_strs_idx; + } + strs_idx[strs_cnt++] = next_str; + next_str += strlen(next_str); + } + /* Copy type section */ ret_types = raw_btf + offset; for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { if (raw_types[i] == NAME_TBD) { - next_str = get_next_str(next_str, end_str); - if (CHECK(!next_str, "Error in getting next_str")) { - free(raw_btf); - return NULL; + if (CHECK(next_str_idx == strs_cnt, + "Error in getting next_str #%d", + next_str_idx)) { + err = -1; + goto done; } - ret_types[i] = next_str - str; - next_str += strlen(next_str); + ret_types[i] = strs_idx[next_str_idx++] - str; + } else if (IS_NAME_NTH(raw_types[i])) { + int idx = GET_NAME_NTH_IDX(raw_types[i]); + + if (CHECK(idx <= 0 || idx > strs_cnt, + "Error getting string #%d, strs_cnt:%d", + idx, strs_cnt)) { + err = -1; + goto done; + } + ret_types[i] = strs_idx[idx-1] - str; } else { ret_types[i] = raw_types[i]; } @@ -2873,8 +2923,17 @@ static void *btf_raw_create(const struct btf_header *hdr, *btf_size = size_needed; if (ret_next_str) - *ret_next_str = next_str; + *ret_next_str = + next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; +done: + if (err) { + if (raw_btf) + free(raw_btf); + if (strs_idx) + free(strs_idx); + return NULL; + } return raw_btf; } @@ -5543,20 +5602,450 @@ static int test_info_raw(void) return err; } +struct btf_raw_data { + __u32 raw_types[MAX_NR_RAW_U32]; + const char *str_sec; + __u32 str_sec_size; +}; + +struct btf_dedup_test { + const char *descr; + struct btf_raw_data input; + struct btf_raw_data expect; + struct btf_dedup_opts opts; +}; + +const struct btf_dedup_test dedup_tests[] = { + +{ + .descr = "dedup: unused strings filtering", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: strings deduplication", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int\0int\0long int\0int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct example #1", + /* + * struct s { + * struct s *next; + * const int *a; + * int b[16]; + * int c; + * } + */ + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + + /* full copy of the above */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ + BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_PTR_ENC(9), /* [10] */ + BTF_PTR_ENC(12), /* [11] */ + BTF_CONST_ENC(7), /* [12] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: all possible kinds (no duplicates)", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: no int duplicates", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, + +}; + +static int btf_type_size(const struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u16 kind = BTF_INFO_KIND(t->info); + + switch (kind) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); + return -EINVAL; + } +} + +static void dump_btf_strings(const char *strs, __u32 len) +{ + const char *cur = strs; + int i = 0; + + while (cur < strs + len) { + fprintf(stderr, "string #%d: '%s'\n", i, cur); + cur += strlen(cur) + 1; + i++; + } +} + +static int do_test_dedup(unsigned int test_num) +{ + const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + int err = 0, i; + __u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len; + void *raw_btf; + unsigned int raw_btf_size; + struct btf *test_btf = NULL, *expect_btf = NULL; + const char *ret_test_next_str, *ret_expect_next_str; + const char *test_strs, *expect_strs; + const char *test_str_cur, *test_str_end; + const char *expect_str_cur, *expect_str_end; + + fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); + + raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, + test->input.str_sec, test->input.str_sec_size, + &raw_btf_size, &ret_test_next_str); + if (!raw_btf) + return -1; + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", + PTR_ERR(test_btf))) { + err = -1; + goto done; + } + + raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types, + test->expect.str_sec, + test->expect.str_sec_size, + &raw_btf_size, &ret_expect_next_str); + if (!raw_btf) + return -1; + expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", + PTR_ERR(expect_btf))) { + err = -1; + goto done; + } + + err = btf__dedup(test_btf, NULL, &test->opts); + if (CHECK(err, "btf_dedup failed errno:%d", err)) { + err = -1; + goto done; + } + + btf__get_strings(test_btf, &test_strs, &test_str_len); + btf__get_strings(expect_btf, &expect_strs, &expect_str_len); + if (CHECK(test_str_len != expect_str_len, + "test_str_len:%u != expect_str_len:%u", + test_str_len, expect_str_len)) { + fprintf(stderr, "\ntest strings:\n"); + dump_btf_strings(test_strs, test_str_len); + fprintf(stderr, "\nexpected strings:\n"); + dump_btf_strings(expect_strs, expect_str_len); + err = -1; + goto done; + } + + test_str_cur = test_strs; + test_str_end = test_strs + test_str_len; + expect_str_cur = expect_strs; + expect_str_end = expect_strs + expect_str_len; + while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + size_t test_len, expect_len; + + test_len = strlen(test_str_cur); + expect_len = strlen(expect_str_cur); + if (CHECK(test_len != expect_len, + "test_len:%zu != expect_len:%zu " + "(test_str:%s, expect_str:%s)", + test_len, expect_len, test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + if (CHECK(strcmp(test_str_cur, expect_str_cur), + "test_str:%s != expect_str:%s", + test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + test_str_cur += test_len + 1; + expect_str_cur += expect_len + 1; + } + if (CHECK(test_str_cur != test_str_end, + "test_str_cur:%p != test_str_end:%p", + test_str_cur, test_str_end)) { + err = -1; + goto done; + } + + test_nr_types = btf__get_nr_types(test_btf); + expect_nr_types = btf__get_nr_types(expect_btf); + if (CHECK(test_nr_types != expect_nr_types, + "test_nr_types:%u != expect_nr_types:%u", + test_nr_types, expect_nr_types)) { + err = -1; + goto done; + } + + for (i = 1; i <= test_nr_types; i++) { + const struct btf_type *test_type, *expect_type; + int test_size, expect_size; + + test_type = btf__type_by_id(test_btf, i); + expect_type = btf__type_by_id(expect_btf, i); + test_size = btf_type_size(test_type); + expect_size = btf_type_size(expect_type); + + if (CHECK(test_size != expect_size, + "type #%d: test_size:%d != expect_size:%u", + i, test_size, expect_size)) { + err = -1; + goto done; + } + if (CHECK(memcmp((void *)test_type, + (void *)expect_type, + test_size), + "type #%d: contents differ", i)) { + err = -1; + goto done; + } + } + +done: + if (!err) + fprintf(stderr, "OK"); + if (!IS_ERR(test_btf)) + btf__free(test_btf); + if (!IS_ERR(expect_btf)) + btf__free(expect_btf); + + return err; +} + +static int test_dedup(void) +{ + unsigned int i; + int err = 0; + + if (args.dedup_test_num) + return count_result(do_test_dedup(args.dedup_test_num)); + + for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) + err |= count_result(do_test_dedup(i)); + + return err; +} + static void usage(const char *cmd) { fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" "\t[-g btf_get_info_test_num (1 - %zu)] |\n" "\t[-f btf_file_test_num (1 - %zu)] |\n" "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)]]\n", + "\t[-p (pretty print test)] |\n" + "\t[-d btf_dedup_test_num (1 - %zu)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), + ARRAY_SIZE(dedup_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpk:f:r:g:"; + const char *optstr = "hlpk:f:r:g:d:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -5583,12 +6072,16 @@ static int parse_args(int argc, char **argv) args.info_raw_test_num = atoi(optarg); args.info_raw_test = true; break; + case 'd': + args.dedup_test_num = atoi(optarg); + args.dedup_test = true; + break; case 'h': usage(argv[0]); exit(0); default: - usage(argv[0]); - return -1; + usage(argv[0]); + return -1; } } @@ -5624,6 +6117,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.dedup_test_num && + (args.dedup_test_num < 1 || + args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { + fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", + ARRAY_SIZE(dedup_tests)); + return -1; + } + return 0; } @@ -5659,14 +6160,18 @@ int main(int argc, char **argv) if (args.info_raw_test) err |= test_info_raw(); + if (args.dedup_test) + err |= test_dedup(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test) + args.pprint_test || args.info_raw_test || args.dedup_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); err |= test_info_raw(); + err |= test_dedup(); done: print_summary(); From 2353ecc6f91fd15b893fa01bf85a1c7a823ee4f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 5 Feb 2019 13:41:22 +0100 Subject: [PATCH 0617/1436] bpf, riscv: add BPF JIT for RV64G MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a BPF JIT for RV64G. The JIT is a two-pass JIT, and has a dynamic prolog/epilogue (similar to the MIPS64 BPF JIT) instead of static ones (e.g. x86_64). At the moment the RISC-V Linux port does not support CONFIG_HAVE_KPROBES, which means that CONFIG_BPF_EVENTS is not supported. Thus, no tests involving BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_PERF_EVENT, BPF_PROG_TYPE_KPROBE and BPF_PROG_TYPE_RAW_TRACEPOINT passes. The implementation does not support "far branching" (>4KiB). Test results: # modprobe test_bpf test_bpf: Summary: 378 PASSED, 0 FAILED, [366/366 JIT'ed] # echo 1 > /proc/sys/kernel/unprivileged_bpf_disabled # ./test_verifier ... Summary: 761 PASSED, 507 SKIPPED, 2 FAILED Note that "test_verifier" was run with one build with CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y and one without, otherwise many of the the tests that require unaligned access were skipped. CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y: # echo 1 > /proc/sys/kernel/unprivileged_bpf_disabled # ./test_verifier | grep -c 'NOTE.*unknown align' 0 No CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS: # echo 1 > /proc/sys/kernel/unprivileged_bpf_disabled # ./test_verifier | grep -c 'NOTE.*unknown align' 59 The two failing test_verifier tests are: "ld_abs: vlan + abs, test 1" "ld_abs: jump around ld_abs" This is due to that "far branching" involved in those tests. All tests where done on QEMU (QEMU emulator version 3.1.50 (v3.1.0-688-g8ae951fbc106)). Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 2 +- arch/riscv/net/Makefile | 1 + arch/riscv/net/bpf_jit_comp.c | 1602 +++++++++++++++++++++++++++++++++ 4 files changed, 1605 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/net/Makefile create mode 100644 arch/riscv/net/bpf_jit_comp.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index feeeaa60697c..e64c657060bb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -49,6 +49,7 @@ config RISCV select RISCV_TIMER select GENERIC_IRQ_MULTI_HANDLER select ARCH_HAS_PTE_SPECIAL + select HAVE_EBPF_JIT if 64BIT config MMU def_bool y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4b594f2e4f7e..c6342e638ef7 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -77,7 +77,7 @@ KBUILD_IMAGE := $(boot)/Image.gz head-y := arch/riscv/kernel/head.o -core-y += arch/riscv/kernel/ arch/riscv/mm/ +core-y += arch/riscv/kernel/ arch/riscv/mm/ arch/riscv/net/ libs-y += arch/riscv/lib/ diff --git a/arch/riscv/net/Makefile b/arch/riscv/net/Makefile new file mode 100644 index 000000000000..a132220cc582 --- /dev/null +++ b/arch/riscv/net/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c new file mode 100644 index 000000000000..80b12aa5e10d --- /dev/null +++ b/arch/riscv/net/bpf_jit_comp.c @@ -0,0 +1,1602 @@ +// SPDX-License-Identifier: GPL-2.0 +/* BPF JIT compiler for RV64G + * + * Copyright(c) 2019 Björn Töpel + * + */ + +#include +#include +#include + +enum { + RV_REG_ZERO = 0, /* The constant value 0 */ + RV_REG_RA = 1, /* Return address */ + RV_REG_SP = 2, /* Stack pointer */ + RV_REG_GP = 3, /* Global pointer */ + RV_REG_TP = 4, /* Thread pointer */ + RV_REG_T0 = 5, /* Temporaries */ + RV_REG_T1 = 6, + RV_REG_T2 = 7, + RV_REG_FP = 8, + RV_REG_S1 = 9, /* Saved registers */ + RV_REG_A0 = 10, /* Function argument/return values */ + RV_REG_A1 = 11, /* Function arguments */ + RV_REG_A2 = 12, + RV_REG_A3 = 13, + RV_REG_A4 = 14, + RV_REG_A5 = 15, + RV_REG_A6 = 16, + RV_REG_A7 = 17, + RV_REG_S2 = 18, /* Saved registers */ + RV_REG_S3 = 19, + RV_REG_S4 = 20, + RV_REG_S5 = 21, + RV_REG_S6 = 22, + RV_REG_S7 = 23, + RV_REG_S8 = 24, + RV_REG_S9 = 25, + RV_REG_S10 = 26, + RV_REG_S11 = 27, + RV_REG_T3 = 28, /* Temporaries */ + RV_REG_T4 = 29, + RV_REG_T5 = 30, + RV_REG_T6 = 31, +}; + +#define RV_REG_TCC RV_REG_A6 +#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ + +static const int regmap[] = { + [BPF_REG_0] = RV_REG_A5, + [BPF_REG_1] = RV_REG_A0, + [BPF_REG_2] = RV_REG_A1, + [BPF_REG_3] = RV_REG_A2, + [BPF_REG_4] = RV_REG_A3, + [BPF_REG_5] = RV_REG_A4, + [BPF_REG_6] = RV_REG_S1, + [BPF_REG_7] = RV_REG_S2, + [BPF_REG_8] = RV_REG_S3, + [BPF_REG_9] = RV_REG_S4, + [BPF_REG_FP] = RV_REG_S5, + [BPF_REG_AX] = RV_REG_T0, +}; + +enum { + RV_CTX_F_SEEN_TAIL_CALL = 0, + RV_CTX_F_SEEN_CALL = RV_REG_RA, + RV_CTX_F_SEEN_S1 = RV_REG_S1, + RV_CTX_F_SEEN_S2 = RV_REG_S2, + RV_CTX_F_SEEN_S3 = RV_REG_S3, + RV_CTX_F_SEEN_S4 = RV_REG_S4, + RV_CTX_F_SEEN_S5 = RV_REG_S5, + RV_CTX_F_SEEN_S6 = RV_REG_S6, +}; + +struct rv_jit_context { + struct bpf_prog *prog; + u32 *insns; /* RV insns */ + int ninsns; + int epilogue_offset; + int *offset; /* BPF to RV */ + unsigned long flags; + int stack_size; +}; + +struct rv_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct rv_jit_context ctx; +}; + +static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) +{ + u8 reg = regmap[bpf_reg]; + + switch (reg) { + case RV_CTX_F_SEEN_S1: + case RV_CTX_F_SEEN_S2: + case RV_CTX_F_SEEN_S3: + case RV_CTX_F_SEEN_S4: + case RV_CTX_F_SEEN_S5: + case RV_CTX_F_SEEN_S6: + __set_bit(reg, &ctx->flags); + } + return reg; +}; + +static bool seen_reg(int reg, struct rv_jit_context *ctx) +{ + switch (reg) { + case RV_CTX_F_SEEN_CALL: + case RV_CTX_F_SEEN_S1: + case RV_CTX_F_SEEN_S2: + case RV_CTX_F_SEEN_S3: + case RV_CTX_F_SEEN_S4: + case RV_CTX_F_SEEN_S5: + case RV_CTX_F_SEEN_S6: + return test_bit(reg, &ctx->flags); + } + return false; +} + +static void mark_call(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); +} + +static bool seen_call(struct rv_jit_context *ctx) +{ + return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); +} + +static void mark_tail_call(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); +} + +static bool seen_tail_call(struct rv_jit_context *ctx) +{ + return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); +} + +static u8 rv_tail_call_reg(struct rv_jit_context *ctx) +{ + mark_tail_call(ctx); + + if (seen_call(ctx)) { + __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags); + return RV_REG_S6; + } + return RV_REG_A6; +} + +static void emit(const u32 insn, struct rv_jit_context *ctx) +{ + if (ctx->insns) + ctx->insns[ctx->ninsns] = insn; + + ctx->ninsns++; +} + +static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode) +{ + return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (rd << 7) | opcode; +} + +static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) +{ + return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | + opcode; +} + +static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; + + return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_0 << 7) | opcode; +} + +static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); + u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); + + return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_1 << 7) | opcode; +} + +static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) +{ + return (imm31_12 << 12) | (rd << 7) | opcode; +} + +static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode) +{ + u32 imm; + + imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | + ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); + + return (imm << 12) | (rd << 7) | opcode; +} + +static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, + u8 funct3, u8 rd, u8 opcode) +{ + u8 funct7 = (funct5 << 2) | (aq << 1) | rl; + + return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); +} + +static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); +} + +static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); +} + +static u32 rv_addw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); +} + +static u32 rv_add(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); +} + +static u32 rv_subw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); +} + +static u32 rv_sub(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); +} + +static u32 rv_and(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); +} + +static u32 rv_or(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); +} + +static u32 rv_xor(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); +} + +static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); +} + +static u32 rv_mul(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); +} + +static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); +} + +static u32 rv_divu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); +} + +static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); +} + +static u32 rv_remu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); +} + +static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); +} + +static u32 rv_sll(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); +} + +static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); +} + +static u32 rv_srl(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); +} + +static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); +} + +static u32 rv_sra(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); +} + +static u32 rv_lui(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x37); +} + +static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); +} + +static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); +} + +static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); +} + +static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); +} + +static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); +} + +static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); +} + +static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); +} + +static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); +} + +static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); +} + +static u32 rv_jal(u8 rd, u32 imm20_1) +{ + return rv_uj_insn(imm20_1, rd, 0x6f); +} + +static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); +} + +static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63); +} + +static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63); +} + +static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63); +} + +static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63); +} + +static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63); +} + +static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63); +} + +static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); +} + +static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); +} + +static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); +} + +static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); +} + +static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); +} + +static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); +} + +static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); +} + +static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); +} + +static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +static bool is_12b_int(s64 val) +{ + return -(1 << 11) <= val && val < (1 << 11); +} + +static bool is_13b_int(s64 val) +{ + return -(1 << 12) <= val && val < (1 << 12); +} + +static bool is_21b_int(s64 val) +{ + return -(1L << 20) <= val && val < (1L << 20); +} + +static bool is_32b_int(s64 val) +{ + return -(1L << 31) <= val && val < (1L << 31); +} + +static int is_12b_check(int off, int insn) +{ + if (!is_12b_int(off)) { + pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + insn, (int)off); + return -1; + } + return 0; +} + +static int is_13b_check(int off, int insn) +{ + if (!is_13b_int(off)) { + pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + insn, (int)off); + return -1; + } + return 0; +} + +static int is_21b_check(int off, int insn) +{ + if (!is_21b_int(off)) { + pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + insn, (int)off); + return -1; + } + return 0; +} + +static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) +{ + /* Note that the immediate from the add is sign-extended, + * which means that we need to compensate this by adding 2^12, + * when the 12th bit is set. A simpler way of doing this, and + * getting rid of the check, is to just add 2**11 before the + * shift. The "Loading a 32-Bit constant" example from the + * "Computer Organization and Design, RISC-V edition" book by + * Patterson/Hennessy highlights this fact. + * + * This also means that we need to process LSB to MSB. + */ + s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff; + int shift; + + if (is_32b_int(val)) { + if (upper) + emit(rv_lui(rd, upper), ctx); + + if (!upper) { + emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); + return; + } + + emit(rv_addiw(rd, rd, lower), ctx); + return; + } + + shift = __ffs(upper); + upper >>= shift; + shift += 12; + + emit_imm(rd, upper, ctx); + + emit(rv_slli(rd, rd, shift), ctx); + if (lower) + emit(rv_addi(rd, rd, lower), ctx); +} + +static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx) +{ + int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to]; + + return (to - from) << 2; +} + +static int epilogue_offset(struct rv_jit_context *ctx) +{ + int to = ctx->epilogue_offset, from = ctx->ninsns; + + return (to - from) << 2; +} + +static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) +{ + int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; + + if (seen_reg(RV_REG_RA, ctx)) { + emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + if (seen_reg(RV_REG_S1, ctx)) { + emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S2, ctx)) { + emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S3, ctx)) { + emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S4, ctx)) { + emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S5, ctx)) { + emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S6, ctx)) { + emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx); + store_offset -= 8; + } + + emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); + /* Set return value. */ + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); +} + +static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) +{ + emit(rv_slli(reg, reg, 32), ctx); + emit(rv_srli(reg, reg, 32), ctx); +} + +static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) +{ + int tc_ninsn, off, start_insn = ctx->ninsns; + u8 tcc = rv_tail_call_reg(ctx); + + /* a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : + ctx->offset[0]; + emit_zext_32(RV_REG_A2, ctx); + + off = offsetof(struct bpf_array, map.max_entries); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + if (is_13b_check(off, insn)) + return -1; + emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); + + /* if (--TCC < 0) + * goto out; + */ + emit(rv_addi(RV_REG_T1, tcc, -1), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + if (is_13b_check(off, insn)) + return -1; + emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx); + + /* prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx); + off = offsetof(struct bpf_array, ptrs); + if (is_12b_check(off, insn)) + return -1; + emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + if (is_13b_check(off, insn)) + return -1; + emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + if (is_12b_check(off, insn)) + return -1; + emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); + emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx); + emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); + __build_epilogue(RV_REG_T3, ctx); + return 0; +} + +static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, + struct rv_jit_context *ctx) +{ + u8 code = insn->code; + + switch (code) { + case BPF_JMP | BPF_JA: + case BPF_JMP | BPF_CALL: + case BPF_JMP | BPF_EXIT: + case BPF_JMP | BPF_TAIL_CALL: + break; + default: + *rd = bpf_to_rv_reg(insn->dst_reg, ctx); + } + + if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) || + code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || + code & BPF_LDX || code & BPF_STX) + *rs = bpf_to_rv_reg(insn->src_reg, ctx); +} + +static int rv_offset_check(int *rvoff, s16 off, int insn, + struct rv_jit_context *ctx) +{ + *rvoff = rv_offset(insn + off, insn, ctx); + return is_13b_check(*rvoff, insn); +} + +static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) +{ + emit(rv_addi(RV_REG_T2, *rd, 0), ctx); + emit_zext_32(RV_REG_T2, ctx); + emit(rv_addi(RV_REG_T1, *rs, 0), ctx); + emit_zext_32(RV_REG_T1, ctx); + *rd = RV_REG_T2; + *rs = RV_REG_T1; +} + +static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) +{ + emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); + emit(rv_addiw(RV_REG_T1, *rs, 0), ctx); + *rd = RV_REG_T2; + *rs = RV_REG_T1; +} + +static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) +{ + emit(rv_addi(RV_REG_T2, *rd, 0), ctx); + emit_zext_32(RV_REG_T2, ctx); + emit_zext_32(RV_REG_T1, ctx); + *rd = RV_REG_T2; +} + +static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) +{ + emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); + *rd = RV_REG_T2; +} + +static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass) +{ + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || + BPF_CLASS(insn->code) == BPF_JMP; + int rvoff, i = insn - ctx->prog->insnsi; + u8 rd = -1, rs = -1, code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + + init_regs(&rd, &rs, insn, ctx); + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + + /* dst = dst OP src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit(rv_and(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit(rv_or(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit(rv_xor(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_LSH | BPF_X: + emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); + break; + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : + rv_subw(rd, RV_REG_ZERO, rd), ctx); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + { + int shift = 64 - imm; + + emit(rv_slli(rd, rd, shift), ctx); + emit(rv_srli(rd, rd, shift), ctx); + break; + } + case BPF_ALU | BPF_END | BPF_FROM_BE: + emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx); + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + if (imm == 16) + goto out_be; + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + if (imm == 32) + goto out_be; + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); +out_be: + emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); + + emit(rv_addi(rd, RV_REG_T2, 0), ctx); + break; + + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_imm(rd, imm, ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + + /* dst = dst OP imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_12b_int(imm)) { + emit(is64 ? rv_addi(rd, rd, imm) : + rv_addiw(rd, rd, imm), ctx); + } else { + emit_imm(RV_REG_T1, imm, ctx); + emit(is64 ? rv_add(rd, rd, RV_REG_T1) : + rv_addw(rd, rd, RV_REG_T1), ctx); + } + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_12b_int(-imm)) { + emit(is64 ? rv_addi(rd, rd, -imm) : + rv_addiw(rd, rd, -imm), ctx); + } else { + emit_imm(RV_REG_T1, imm, ctx); + emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : + rv_subw(rd, rd, RV_REG_T1), ctx); + } + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_12b_int(imm)) { + emit(rv_andi(rd, rd, imm), ctx); + } else { + emit_imm(RV_REG_T1, imm, ctx); + emit(rv_and(rd, rd, RV_REG_T1), ctx); + } + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_12b_int(imm)) { + emit(rv_ori(rd, rd, imm), ctx); + } else { + emit_imm(RV_REG_T1, imm, ctx); + emit(rv_or(rd, rd, RV_REG_T1), ctx); + } + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_12b_int(imm)) { + emit(rv_xori(rd, rd, imm), ctx); + } else { + emit_imm(RV_REG_T1, imm, ctx); + emit(rv_xor(rd, rd, RV_REG_T1), ctx); + } + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + emit_imm(RV_REG_T1, imm, ctx); + emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : + rv_mulw(rd, rd, RV_REG_T1), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + emit_imm(RV_REG_T1, imm, ctx); + emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : + rv_divuw(rd, rd, RV_REG_T1), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + emit_imm(RV_REG_T1, imm, ctx); + emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : + rv_remuw(rd, rd, RV_REG_T1), ctx); + if (!is64) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); + break; + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); + break; + + /* JUMP off */ + case BPF_JMP | BPF_JA: + rvoff = rv_offset(i + off, i, ctx); + if (!is_21b_int(rvoff)) { + pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + i, rvoff); + return -1; + } + + emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + break; + + /* IF (dst COND src) JUMP off */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_beq(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bltu(rs, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bltu(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bne(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_sext_32_rd_rs(&rd, &rs, ctx); + emit(rv_blt(rs, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_sext_32_rd_rs(&rd, &rs, ctx); + emit(rv_blt(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_sext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bge(rd, rs, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_sext_32_rd_rs(&rd, &rs, ctx); + emit(rv_bge(rs, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + if (!is64) + emit_zext_32_rd_rs(&rd, &rs, ctx); + emit(rv_and(RV_REG_T1, rd, rs), ctx); + emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + break; + + /* IF (dst COND imm) JUMP off */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_sext_32_rd(&rd, ctx); + emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_sext_32_rd(&rd, ctx); + emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_sext_32_rd(&rd, ctx); + emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_sext_32_rd(&rd, ctx); + emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx); + break; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + if (rv_offset_check(&rvoff, off, i, ctx)) + return -1; + emit_imm(RV_REG_T1, imm, ctx); + if (!is64) + emit_zext_32_rd_t1(&rd, ctx); + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); + emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + break; + + /* function call */ + case BPF_JMP | BPF_CALL: + { + bool fixed; + int i, ret; + u64 addr; + + mark_call(ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, + &fixed); + if (ret < 0) + return ret; + if (fixed) { + emit_imm(RV_REG_T1, addr, ctx); + } else { + i = ctx->ninsns; + emit_imm(RV_REG_T1, addr, ctx); + for (i = ctx->ninsns - i; i < 8; i++) { + /* nop */ + emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0), + ctx); + } + } + emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx); + rd = bpf_to_rv_reg(BPF_REG_0, ctx); + emit(rv_addi(rd, RV_REG_A0, 0), ctx); + break; + } + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(i, ctx)) + return -1; + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + if (i == ctx->prog->len - 1) + break; + + rvoff = epilogue_offset(ctx); + if (is_21b_check(rvoff, i)) + return -1; + emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + { + struct bpf_insn insn1 = insn[1]; + u64 imm64; + + imm64 = (u64)insn1.imm << 32 | (u32)imm; + emit_imm(rd, imm64, ctx); + return 1; + } + + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + if (is_12b_int(off)) { + emit(rv_lbu(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + break; + case BPF_LDX | BPF_MEM | BPF_H: + if (is_12b_int(off)) { + emit(rv_lhu(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + break; + case BPF_LDX | BPF_MEM | BPF_W: + if (is_12b_int(off)) { + emit(rv_lwu(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + break; + case BPF_LDX | BPF_MEM | BPF_DW: + if (is_12b_int(off)) { + emit(rv_ld(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); + emit(rv_ld(rd, 0, RV_REG_T1), ctx); + break; + + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + emit_imm(RV_REG_T1, imm, ctx); + if (is_12b_int(off)) { + emit(rv_sb(rd, off, RV_REG_T1), ctx); + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); + emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); + break; + + case BPF_ST | BPF_MEM | BPF_H: + emit_imm(RV_REG_T1, imm, ctx); + if (is_12b_int(off)) { + emit(rv_sh(rd, off, RV_REG_T1), ctx); + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); + emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); + break; + case BPF_ST | BPF_MEM | BPF_W: + emit_imm(RV_REG_T1, imm, ctx); + if (is_12b_int(off)) { + emit(rv_sw(rd, off, RV_REG_T1), ctx); + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); + emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx); + break; + case BPF_ST | BPF_MEM | BPF_DW: + emit_imm(RV_REG_T1, imm, ctx); + if (is_12b_int(off)) { + emit(rv_sd(rd, off, RV_REG_T1), ctx); + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); + emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx); + break; + + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_B: + if (is_12b_int(off)) { + emit(rv_sb(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); + emit(rv_sb(RV_REG_T1, 0, rs), ctx); + break; + case BPF_STX | BPF_MEM | BPF_H: + if (is_12b_int(off)) { + emit(rv_sh(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); + emit(rv_sh(RV_REG_T1, 0, rs), ctx); + break; + case BPF_STX | BPF_MEM | BPF_W: + if (is_12b_int(off)) { + emit(rv_sw(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); + emit(rv_sw(RV_REG_T1, 0, rs), ctx); + break; + case BPF_STX | BPF_MEM | BPF_DW: + if (is_12b_int(off)) { + emit(rv_sd(rd, off, rs), ctx); + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); + emit(rv_sd(RV_REG_T1, 0, rs), ctx); + break; + /* STX XADD: lock *(u32 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_W: + /* STX XADD: lock *(u64 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_DW: + if (off) { + if (is_12b_int(off)) { + emit(rv_addi(RV_REG_T1, rd, off), ctx); + } else { + emit_imm(RV_REG_T1, off, ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); + } + + rd = RV_REG_T1; + } + + emit(BPF_SIZE(code) == BPF_W ? + rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : + rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); + break; + default: + pr_err("bpf-jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +static void build_prologue(struct rv_jit_context *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + if (seen_reg(RV_REG_RA, ctx)) + stack_adjust += 8; + stack_adjust += 8; /* RV_REG_FP */ + if (seen_reg(RV_REG_S1, ctx)) + stack_adjust += 8; + if (seen_reg(RV_REG_S2, ctx)) + stack_adjust += 8; + if (seen_reg(RV_REG_S3, ctx)) + stack_adjust += 8; + if (seen_reg(RV_REG_S4, ctx)) + stack_adjust += 8; + if (seen_reg(RV_REG_S5, ctx)) + stack_adjust += 8; + if (seen_reg(RV_REG_S6, ctx)) + stack_adjust += 8; + + stack_adjust = round_up(stack_adjust, 16); + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + stack_adjust += bpf_stack_adjust; + + store_offset = stack_adjust - 8; + + /* First instruction is always setting the tail-call-counter + * (TCC) register. This instruction is skipped for tail calls. + */ + emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); + + emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); + + if (seen_reg(RV_REG_RA, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx); + store_offset -= 8; + } + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx); + store_offset -= 8; + if (seen_reg(RV_REG_S1, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S2, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S3, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S4, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S5, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx); + store_offset -= 8; + } + if (seen_reg(RV_REG_S6, ctx)) { + emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx); + store_offset -= 8; + } + + emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); + + if (bpf_stack_adjust) + emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx); + + /* Program contains calls and tail calls, so RV_REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); + + ctx->stack_size = stack_adjust; +} + +static void build_epilogue(struct rv_jit_context *ctx) +{ + __build_epilogue(RV_REG_RA, ctx); +} + +static int build_body(struct rv_jit_context *ctx, bool extra_pass) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + ret = emit_insn(insn, ctx, extra_pass); + if (ret > 0) { + i++; + if (ctx->insns == NULL) + ctx->offset[i] = ctx->ninsns; + continue; + } + if (ctx->insns == NULL) + ctx->offset[i] = ctx->ninsns; + if (ret) + return ret; + } + return 0; +} + +static void bpf_fill_ill_insns(void *area, unsigned int size) +{ + memset(area, 0, size); +} + +static void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; + struct rv_jit_data *jit_data; + struct rv_jit_context *ctx; + unsigned int image_size; + + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + + ctx = &jit_data->ctx; + + if (ctx->offset) { + extra_pass = true; + image_size = sizeof(u32) * ctx->ninsns; + goto skip_init_ctx; + } + + ctx->prog = prog; + ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + if (!ctx->offset) { + prog = orig_prog; + goto out_offset; + } + + /* First pass generates the ctx->offset, but does not emit an image. */ + if (build_body(ctx, extra_pass)) { + prog = orig_prog; + goto out_offset; + } + build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; + build_epilogue(ctx); + + /* Allocate image, now that we know the size. */ + image_size = sizeof(u32) * ctx->ninsns; + jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + /* Second, real pass, that acutally emits the image. */ + ctx->insns = (u32 *)jit_data->image; +skip_init_ctx: + ctx->ninsns = 0; + + build_prologue(ctx); + if (build_body(ctx, extra_pass)) { + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + build_epilogue(ctx); + + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx->insns); + + prog->bpf_func = (void *)ctx->insns; + prog->jited = 1; + prog->jited_len = image_size; + + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); + + if (!prog->is_func || extra_pass) { +out_offset: + kfree(ctx->offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; +} From 8a9e0aff8844ab580aad0678e9a1478882d8972a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 5 Feb 2019 13:41:23 +0100 Subject: [PATCH 0618/1436] MAINTAINERS: add RISC-V BPF JIT maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Björn Töpel as RISC-V BPF JIT maintainer. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 019a2bcfbd09..b4491132b9ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2907,6 +2907,12 @@ L: netdev@vger.kernel.org S: Maintained F: arch/powerpc/net/ +BPF JIT for RISC-V (RV64G) +M: Björn Töpel +L: netdev@vger.kernel.org +S: Maintained +F: arch/riscv/net/ + BPF JIT for S390 M: Martin Schwidefsky M: Heiko Carstens From e8cb0167ae684ed2f73cd880c385b74ef2ae702e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 5 Feb 2019 13:41:24 +0100 Subject: [PATCH 0619/1436] bpf, doc: add RISC-V JIT to BPF documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update Documentation/networking/filter.txt and Documentation/sysctl/net.txt to mention RISC-V. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- Documentation/networking/filter.txt | 16 +++++++++------- Documentation/sysctl/net.txt | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 01603bc2eff1..b5e060edfc38 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -464,10 +464,11 @@ breakpoints: 0 1 JIT compiler ------------ -The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, PowerPC, -ARM, ARM64, MIPS and s390 and can be enabled through CONFIG_BPF_JIT. The JIT -compiler is transparently invoked for each attached filter from user space -or for internal kernel users if it has been previously enabled by root: +The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, +PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through +CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each +attached filter from user space or for internal kernel users if it has +been previously enabled by root: echo 1 > /proc/sys/net/core/bpf_jit_enable @@ -603,9 +604,10 @@ got from bpf_prog_create(), and 'ctx' the given context (e.g. skb pointer). All constraints and restrictions from bpf_check_classic() apply before a conversion to the new layout is being done behind the scenes! -Currently, the classic BPF format is being used for JITing on most 32-bit -architectures, whereas x86-64, aarch64, s390x, powerpc64, sparc64, arm32 perform -JIT compilation from eBPF instruction set. +Currently, the classic BPF format is being used for JITing on most +32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, +sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF +instruction set. Some core changes of the new internal format: diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index bc0680706870..2ae91d3873bb 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -52,6 +52,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - sparc64 - mips64 - s390x + - riscv And the older cBPF JIT supported on the following archs: - mips From e2c6f50e48849298bed694de03cceb537d95cdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 5 Feb 2019 13:41:25 +0100 Subject: [PATCH 0620/1436] selftests/bpf: add "any alignment" annotation for some tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V does, in-general, not have "efficient unaligned access". When testing the RISC-V BPF JIT, some selftests failed in the verification due to misaligned access. Annotate these tests with the F_NEEDS_EFFICIENT_UNALIGNED_ACCESS flag. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- .../selftests/bpf/verifier/ctx_sk_msg.c | 1 + .../testing/selftests/bpf/verifier/ctx_skb.c | 1 + tools/testing/selftests/bpf/verifier/jmp32.c | 22 +++++++++++++++++++ tools/testing/selftests/bpf/verifier/jset.c | 2 ++ .../selftests/bpf/verifier/spill_fill.c | 1 + .../selftests/bpf/verifier/spin_lock.c | 2 ++ .../selftests/bpf/verifier/value_ptr_arith.c | 4 ++++ 7 files changed, 33 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c index b0195770da6a..c6c69220a569 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c @@ -100,6 +100,7 @@ .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid read past end of SK_MSG", diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 881f1c7f57a1..c660deb582f1 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -687,6 +687,7 @@ }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check skb->hash half load not permitted, unaligned 3", diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index ceb39ffa0e88..f0961c58581e 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -27,6 +27,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset32: BPF_X", @@ -58,6 +59,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset32: min/max deduction", @@ -93,6 +95,7 @@ .data64 = { -1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jeq32: BPF_X", @@ -119,6 +122,7 @@ .data64 = { 1ULL << 63 | 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jeq32: min/max deduction", @@ -154,6 +158,7 @@ .data64 = { -1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jne32: BPF_X", @@ -180,6 +185,7 @@ .data64 = { 1ULL << 63 | 2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jne32: min/max deduction", @@ -218,6 +224,7 @@ .data64 = { 0, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jge32: BPF_X", @@ -244,6 +251,7 @@ .data64 = { (UINT_MAX - 1) | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jge32: min/max deduction", @@ -284,6 +292,7 @@ .data64 = { 0, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: BPF_X", @@ -310,6 +319,7 @@ .data64 = { (UINT_MAX - 1) | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: min/max deduction", @@ -350,6 +360,7 @@ .data64 = { INT_MAX, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jle32: BPF_X", @@ -376,6 +387,7 @@ .data64 = { UINT_MAX, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jle32: min/max deduction", @@ -416,6 +428,7 @@ .data64 = { INT_MAX - 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jlt32: BPF_X", @@ -442,6 +455,7 @@ .data64 = { (INT_MAX - 1) | 3ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jlt32: min/max deduction", @@ -482,6 +496,7 @@ .data64 = { -2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsge32: BPF_X", @@ -508,6 +523,7 @@ .data64 = { -2, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsge32: min/max deduction", @@ -548,6 +564,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsgt32: BPF_X", @@ -574,6 +591,7 @@ .data64 = { 0x7fffffff, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsgt32: min/max deduction", @@ -614,6 +632,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsle32: BPF_X", @@ -640,6 +659,7 @@ .data64 = { 0x7fffffff | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsle32: min/max deduction", @@ -680,6 +700,7 @@ .data64 = { 1, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jslt32: BPF_X", @@ -706,6 +727,7 @@ .data64 = { 0x7fffffff | 2ULL << 32, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jslt32: min/max deduction", diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 7e14037acfaf..8dcd4e0383d5 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -53,6 +53,7 @@ .data64 = { ~0ULL, } }, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset: sign-extend", @@ -70,6 +71,7 @@ .result = ACCEPT, .retval = 2, .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jset: known const compare", diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index d58db72fdfe8..45d43bf82f26 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -46,6 +46,7 @@ .errstr_unpriv = "attempt to corrupt spilled", .errstr = "R0 invalid mem access 'inv", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check corrupted spill/fill, LSB", diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c index d829eef372a4..781621facae4 100644 --- a/tools/testing/selftests/bpf/verifier/spin_lock.c +++ b/tools/testing/selftests/bpf/verifier/spin_lock.c @@ -83,6 +83,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "spin_lock: test4 direct ld/st", @@ -112,6 +113,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "spin_lock: test5 call within a locked region", diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 9ab5ace83e02..4b721a77bebb 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -512,6 +512,7 @@ .fixup_map_array_48b = { 3 }, .result = ACCEPT, .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: unknown scalar += value_ptr, 3", @@ -537,6 +538,7 @@ .result_unpriv = REJECT, .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: unknown scalar += value_ptr, 4", @@ -559,6 +561,7 @@ .result = REJECT, .errstr = "R1 max value is outside of the array range", .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: value_ptr += unknown scalar, 1", @@ -598,6 +601,7 @@ .fixup_map_array_48b = { 3 }, .result = ACCEPT, .retval = 0xabcdef12, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map access: value_ptr += unknown scalar, 3", From 3bff2407fbd28fd55ad5b5cccd98fc0c9598f23b Mon Sep 17 00:00:00 2001 From: Udo Eberhardt Date: Tue, 5 Feb 2019 17:20:47 +0100 Subject: [PATCH 0621/1436] ALSA: usb-audio: Add support for new T+A USB DAC This patch adds the T+A VID to the generic check in order to enable native DSD support for T+A devices. This works with the new T+A USB DAC model SD3100HV and will also work with future devices which support the XMOS/Thesycon style DSD format. Signed-off-by: Udo Eberhardt Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index bb8372833fc2..7e65fe853ee3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1567,6 +1567,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x20b1: /* XMOS based devices */ case 0x152a: /* Thesycon devices */ case 0x25ce: /* Mytek devices */ + case 0x2ab6: /* T+A devices */ if (fp->dsd_raw) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From 82eaa1fa0448da1852d7b80832e67e80a08dcc27 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 17 Dec 2018 11:40:06 +0200 Subject: [PATCH 0622/1436] net/mlx5e: FPGA, fix Innova IPsec TX offload data path performance At Innova IPsec TX offload data path a special software parser metadata is used to pass some packet attributes to the hardware, this metadata is passed using the Ethernet control segment of a WQE (a HW descriptor) header. The cited commit might nullify this header, hence the metadata is lost, this caused a significant performance drop during hw offloading operation. Fix by restoring the metadata at the Ethernet control segment in case it was nullified. Fixes: 37fdffb217a4 ("net/mlx5: WQ, fixes for fragmented WQ buffers API") Signed-off-by: Raed Salem Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 598ad7e4d5c9..0e55cd1f2e98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -387,8 +387,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; +#endif } /* fill wqe */ From 6363651d6dd79694d586f4ae68967036dd14bc3a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 10 Jan 2019 20:37:36 +0200 Subject: [PATCH 0623/1436] net/mlx5e: Properly set steering match levels for offloaded TC decap rules The match level computed by the driver gets to be wrong for decap rules with wildcarded inner packet match such as: tc filter add dev vxlan_sys_4789 protocol all parent ffff: prio 2 flower enc_dst_ip 192.168.0.9 enc_key_id 100 enc_dst_port 4789 action tunnel_key unset action mirred egress redirect dev eth1 The FW errs for a missing matching meta-data indicator for the outer headers (where we do have a match), and a wrong matching meta-data indicator for the inner headers (where we don't have a match). Fix that by taking into account the matching on the tunnel info and relating the match level of the encapsulated packet to the firmware inner headers indicator in case of decap. As for vxlan we mandate a match on the tunnel udp dst port, and in general we practically madndate a match on the source or dest ip for any IP tunnel, the fix was done in a minimal manner around the tunnel match parsing code. Fixes: d708f902989b ('net/mlx5e: Get the required HW match level while parsing TC flow matches') Signed-off-by: Or Gerlitz Reported-by: Slava Ovsiienko Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 +++- .../ethernet/mellanox/mlx5/core/en/tc_tun.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 18 ++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../mellanox/mlx5/core/eswitch_offloads.c | 17 +++++++++-------- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 046948ead152..a3750af074a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -612,16 +612,18 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, void *headers_c, - void *headers_v) + void *headers_v, u8 *match_level) { int tunnel_type; int err = 0; tunnel_type = mlx5e_tc_tun_get_type(filter_dev); if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *match_level = MLX5_MATCH_L4; err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, headers_c, headers_v); } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *match_level = MLX5_MATCH_L3; err = mlx5e_tc_tun_parse_gretap(priv, spec, f, headers_c, headers_v); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 706ce7bf15e7..b63f15de899d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -39,6 +39,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, void *headers_c, - void *headers_v); + void *headers_v, u8 *match_level); #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..043896e13ffa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1302,7 +1302,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, - struct net_device *filter_dev) + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1317,7 +1317,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, int err = 0; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v); + headers_c, headers_v, match_level); if (err) { NL_SET_ERR_MSG_MOD(extack, "failed to parse tunnel attributes"); @@ -1426,7 +1426,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, struct net_device *filter_dev, - u8 *match_level) + u8 *match_level, u8 *tunnel_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1477,7 +1477,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; break; default: @@ -1826,11 +1826,11 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; + u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; - u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; @@ -1846,10 +1846,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->esw_attr->match_level = match_level; - else + flow->esw_attr->tunnel_match_level = tunnel_match_level; + } else { flow->nic_attr->match_level = match_level; + } return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..748ff178a1d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -312,6 +312,7 @@ struct mlx5_esw_flow_attr { } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; + u8 tunnel_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..d4e6fe5b9300 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -160,14 +160,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_eswitch_owner_vhca_id); - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (attr->tunnel_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + } else if (attr->match_level != MLX5_MATCH_NONE) { + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; From 1651925d403e077e3fc86f961905e27c6810e132 Mon Sep 17 00:00:00 2001 From: Guy Shattah Date: Mon, 28 Jan 2019 13:58:07 +0000 Subject: [PATCH 0624/1436] net/mlx5e: Use the inner headers to determine tc/pedit offload limitation on decap flows In packets that need to be decaped the internal headers have to be checked, not the external ones. Fixes: bdd66ac0aeed ("net/mlx5e: Disallow TC offloading of unsupported match/action combinations") Signed-off-by: Guy Shattah Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 043896e13ffa..1c3c9fa26b55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2181,6 +2181,7 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, static bool modify_header_match_supported(struct mlx5_flow_spec *spec, struct tcf_exts *exts, + u32 actions, struct netlink_ext_ack *extack) { const struct tc_action *a; @@ -2190,7 +2191,11 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, u16 ethertype; int nkeys, i; - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -2247,7 +2252,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv, if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, exts, - extack); + actions, extack); return true; } From 9c0644ee4aa8792f1e60a2b014b4710faaddafeb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 1 Feb 2019 17:13:57 -0500 Subject: [PATCH 0625/1436] virtio: drop internal struct from UAPI There's no reason to expose struct vring_packed in UAPI - if we do we won't be able to change or drop it, and it's not part of any interface. Let's move it to virtio_ring.c Cc: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 7 ++++++- include/uapi/linux/virtio_ring.h | 10 ---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 27d3f057493e..a0b07c331255 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -152,7 +152,12 @@ struct vring_virtqueue { /* Available for packed ring */ struct { /* Actual memory layout for this queue. */ - struct vring_packed vring; + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; /* Driver ring wrap counter. */ bool avail_wrap_counter; diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 2414f8af26b3..4c4e24c291a5 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -213,14 +213,4 @@ struct vring_packed_desc { __le16 flags; }; -struct vring_packed { - unsigned int num; - - struct vring_packed_desc *desc; - - struct vring_packed_desc_event *driver; - - struct vring_packed_desc_event *device; -}; - #endif /* _UAPI_LINUX_VIRTIO_RING_H */ From 4f2ab5e1d13d6aa77c55f4914659784efd776eb4 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 5 Feb 2019 16:29:40 +0000 Subject: [PATCH 0626/1436] ALSA: compress: Fix stop handling on compressed capture streams It is normal user behaviour to start, stop, then start a stream again without closing it. Currently this works for compressed playback streams but not capture ones. The states on a compressed capture stream go directly from OPEN to PREPARED, unlike a playback stream which moves to SETUP and waits for a write of data before moving to PREPARED. Currently however, when a stop is sent the state is set to SETUP for both types of streams. This leaves a capture stream in the situation where a new start can't be sent as that requires the state to be PREPARED and a new set_params can't be sent as that requires the state to be OPEN. The only option being to close the stream, and then reopen. Correct this issues by allowing snd_compr_drain_notify to set the state depending on the stream direction, as we already do in set_params. Fixes: 49bb6402f1aa ("ALSA: compress_core: Add support for capture streams") Signed-off-by: Charles Keepax Cc: Signed-off-by: Takashi Iwai --- include/sound/compress_driver.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 0cdc3999ecfa..c5188ff724d1 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -173,7 +173,11 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + if (stream->direction == SND_COMPRESS_PLAYBACK) + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + else + stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + wake_up(&stream->runtime->sleep); } From ad4635153034c20c6f6e211e2ed3fd38b658649a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 30 Jan 2019 12:55:52 +0100 Subject: [PATCH 0627/1436] mtd: Make sure mtd->erasesize is valid even if the partition is of size 0 Commit 33f45c44d68b ("mtd: Do not allow MTD devices with inconsistent erase properties") introduced a check to make sure ->erasesize and ->_erase values are consistent with the MTD_NO_ERASE flag. This patch did not take the 0 bytes partition case into account which can happen when the defined partition is outside the flash device memory range. Fix that by setting the partition erasesize to the parent erasesize. Fixes: 33f45c44d68b ("mtd: Do not allow MTD devices with inconsistent erase properties") Reported-by: Geert Uytterhoeven Cc: Cc: Geert Uytterhoeven Signed-off-by: Boris Brezillon Tested-by: Geert Uytterhoeven --- drivers/mtd/mtdpart.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index e6d9467f6be0..37f174ccbcec 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -480,6 +480,10 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent, /* let's register it anyway to preserve ordering */ slave->offset = 0; slave->mtd.size = 0; + + /* Initialize ->erasesize to make add_mtd_device() happy. */ + slave->mtd.erasesize = parent->erasesize; + printk(KERN_ERR"mtd: partition \"%s\" is out of reach -- disabled\n", part->name); goto out_register; From 0a5f49cbf9d6ad3721c16f8a6d823363ea7a160f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 30 Jan 2019 15:21:16 -0500 Subject: [PATCH 0628/1436] drm/amdgpu: use spin_lock_irqsave to protect vm_manager.pasid_idr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_vm_get_task_info is called from interrupt handler and sched timeout workqueue, we should use irq version spin_lock to avoid deadlock. Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2ea5ce2cefb..7c108e687683 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3363,14 +3363,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info) { struct amdgpu_vm *vm; + unsigned long flags; - spin_lock(&adev->vm_manager.pasid_lock); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); if (vm) *task_info = vm->task_info; - spin_unlock(&adev->vm_manager.pasid_lock); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } /** From 6d3d8065bb14dcd3d36b63f81fff9bc7f4388bda Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 21 Jan 2019 16:52:15 -0700 Subject: [PATCH 0629/1436] drm/amdkfd: Fix if preprocessor statement above kfd_fill_iolink_info_for_cpu Clang warns: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_crat.c:866:5: warning: 'CONFIG_X86_64' is not defined, evaluates to 0 [-Wundef] ^ 1 warning generated. Fixes: d1c234e2cd10 ("drm/amdkfd: Allow building KFD on ARM64 (v2)") Signed-off-by: Nathan Chancellor Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 5d85ff341385..2e7c44955f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -863,7 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, return 0; } -#if CONFIG_X86_64 +#ifdef CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) From 7fad8da1ae23171de0ea3cdb2c620f71eb2ab983 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 31 Jan 2019 13:58:21 -0500 Subject: [PATCH 0630/1436] drm/amd/display: Attach VRR properties for eDP connectors [Why] eDP was missing in the checks for supported VRR connectors. [How] Attach the properties for eDP connectors too. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202449 Signed-off-by: Nicholas Kazlauskas Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4fa40c387d3..0b392bfca284 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4082,7 +4082,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, } if (connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_eDP) { drm_connector_attach_vrr_capable_property( &aconnector->base); } From 3079f340caa72a3707357c28e8653120757baeb1 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Jan 2019 15:12:22 +0530 Subject: [PATCH 0631/1436] thermal: of-thermal: Print name of device node with error Make it easier to debug devicetree definition in case of errors. Signed-off-by: Amit Kucheria Signed-off-by: Eduardo Valentin --- drivers/thermal/of-thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 4bfdb4a1e47d..2df059cc07e2 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -867,14 +867,14 @@ __init *thermal_of_build_thermal_zone(struct device_node *np) ret = of_property_read_u32(np, "polling-delay-passive", &prop); if (ret < 0) { - pr_err("missing polling-delay-passive property\n"); + pr_err("%pOFn: missing polling-delay-passive property\n", np); goto free_tz; } tz->passive_delay = prop; ret = of_property_read_u32(np, "polling-delay", &prop); if (ret < 0) { - pr_err("missing polling-delay property\n"); + pr_err("%pOFn: missing polling-delay property\n", np); goto free_tz; } tz->polling_delay = prop; From bf78f133cd39e0ed41551150909e41513958a738 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Jan 2019 15:12:23 +0530 Subject: [PATCH 0632/1436] thermal: cpu_cooling: Clarify error message Make it clear that it is a failure if the cpufreq driver was unable to register as a cooling device. Makes it easier to find in logs and grepping for words like fail, err, warn. Signed-off-by: Amit Kucheria Acked-by: Viresh Kumar Signed-off-by: Eduardo Valentin --- drivers/thermal/cpu_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index dfd23245f778..6fff16113628 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -774,7 +774,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) cdev = __cpufreq_cooling_register(np, policy, capacitance); if (IS_ERR(cdev)) { - pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n", + pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n", policy->cpu, PTR_ERR(cdev)); cdev = NULL; } From 59f58708c5047289589cbf6ee95146b76cf57d1e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 11 Dec 2018 15:59:37 +0800 Subject: [PATCH 0633/1436] e1000e: Exclude device from suspend direct complete optimization e1000e sets different WoL settings in system suspend callback and runtime suspend callback. The suspend direct complete optimization leaves e1000e in runtime suspended state with wrong WoL setting during system suspend. To fix this, we need to disable suspend direct complete optimization to let e1000e always use suspend callback to set correct WoL during system suspend. Signed-off-by: Kai-Heng Feng Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 189f231075c2..e19e53d97ac5 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7351,6 +7351,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) e1000_print_device_info(adapter); + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); From 803cc52323e0c863d3f08740b8c156593a5ca1e9 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 11 Dec 2018 18:55:41 +0200 Subject: [PATCH 0634/1436] igc: Remove unreachable code from igc_phy.c file Address community comment. Remove the unreachable code leads to the static checker warning. PHY functionality will be added later per demand. Reported by Dan Carpenter. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_phy.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 38e43e6fc1c7..4c8f96a9a148 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -152,7 +152,6 @@ void igc_power_down_phy_copper(struct igc_hw *hw) s32 igc_check_downshift(struct igc_hw *hw) { struct igc_phy_info *phy = &hw->phy; - u16 phy_data, offset, mask; s32 ret_val; switch (phy->type) { @@ -161,15 +160,8 @@ s32 igc_check_downshift(struct igc_hw *hw) /* speed downshift not supported */ phy->speed_downgraded = false; ret_val = 0; - goto out; } - ret_val = phy->ops.read_reg(hw, offset, &phy_data); - - if (!ret_val) - phy->speed_downgraded = (phy_data & mask) ? true : false; - -out: return ret_val; } From facd86390be23f0a956136a63d415d90f300fb88 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 12 Dec 2018 14:44:24 -0800 Subject: [PATCH 0635/1436] docs/networking: fix formatting of Intel drivers documentation The documentation of Intel drivers is missing the heading adornment for document titles. This causes the generated html to have TOC entries from these documents to appear as top level TOC entries: * Linux* Base Driver for Intel(R) Ethernet Network Connection * Contents * Identifying Your Adapter * Command Line Parameters * AutoNeg * Duplex ... Add overline heading adornment to document titles. Signed-off-by: Mike Rapoport Signed-off-by: Jeff Kirsher --- Documentation/networking/device_drivers/intel/e100.rst | 1 + Documentation/networking/device_drivers/intel/e1000.rst | 1 + Documentation/networking/device_drivers/intel/e1000e.rst | 1 + Documentation/networking/device_drivers/intel/fm10k.rst | 1 + Documentation/networking/device_drivers/intel/i40e.rst | 1 + Documentation/networking/device_drivers/intel/iavf.rst | 1 + Documentation/networking/device_drivers/intel/ice.rst | 1 + Documentation/networking/device_drivers/intel/igb.rst | 1 + Documentation/networking/device_drivers/intel/igbvf.rst | 1 + Documentation/networking/device_drivers/intel/ixgb.rst | 1 + Documentation/networking/device_drivers/intel/ixgbe.rst | 1 + Documentation/networking/device_drivers/intel/ixgbevf.rst | 1 + 12 files changed, 12 insertions(+) diff --git a/Documentation/networking/device_drivers/intel/e100.rst b/Documentation/networking/device_drivers/intel/e100.rst index 5e2839b4ec92..2b9f4887beda 100644 --- a/Documentation/networking/device_drivers/intel/e100.rst +++ b/Documentation/networking/device_drivers/intel/e100.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +============================================================== Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters ============================================================== diff --git a/Documentation/networking/device_drivers/intel/e1000.rst b/Documentation/networking/device_drivers/intel/e1000.rst index 6379d4d20771..956560b6e745 100644 --- a/Documentation/networking/device_drivers/intel/e1000.rst +++ b/Documentation/networking/device_drivers/intel/e1000.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +=========================================================== Linux* Base Driver for Intel(R) Ethernet Network Connection =========================================================== diff --git a/Documentation/networking/device_drivers/intel/e1000e.rst b/Documentation/networking/device_drivers/intel/e1000e.rst index 33554e5416c5..01999f05509c 100644 --- a/Documentation/networking/device_drivers/intel/e1000e.rst +++ b/Documentation/networking/device_drivers/intel/e1000e.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +====================================================== Linux* Driver for Intel(R) Ethernet Network Connection ====================================================== diff --git a/Documentation/networking/device_drivers/intel/fm10k.rst b/Documentation/networking/device_drivers/intel/fm10k.rst index bf5e5942f28d..ac3269e34f55 100644 --- a/Documentation/networking/device_drivers/intel/fm10k.rst +++ b/Documentation/networking/device_drivers/intel/fm10k.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +============================================================== Linux* Base Driver for Intel(R) Ethernet Multi-host Controller ============================================================== diff --git a/Documentation/networking/device_drivers/intel/i40e.rst b/Documentation/networking/device_drivers/intel/i40e.rst index 0cc16c525d10..848fd388fa6e 100644 --- a/Documentation/networking/device_drivers/intel/i40e.rst +++ b/Documentation/networking/device_drivers/intel/i40e.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +================================================================== Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series ================================================================== diff --git a/Documentation/networking/device_drivers/intel/iavf.rst b/Documentation/networking/device_drivers/intel/iavf.rst index f8b42b64eb28..2d0c3baa1752 100644 --- a/Documentation/networking/device_drivers/intel/iavf.rst +++ b/Documentation/networking/device_drivers/intel/iavf.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +================================================================== Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function ================================================================== diff --git a/Documentation/networking/device_drivers/intel/ice.rst b/Documentation/networking/device_drivers/intel/ice.rst index 4d118b827bbb..c220aa2711c6 100644 --- a/Documentation/networking/device_drivers/intel/ice.rst +++ b/Documentation/networking/device_drivers/intel/ice.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +=================================================================== Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series =================================================================== diff --git a/Documentation/networking/device_drivers/intel/igb.rst b/Documentation/networking/device_drivers/intel/igb.rst index e87a4a72ea2d..fc8cfaa5dcfa 100644 --- a/Documentation/networking/device_drivers/intel/igb.rst +++ b/Documentation/networking/device_drivers/intel/igb.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +=========================================================== Linux* Base Driver for Intel(R) Ethernet Network Connection =========================================================== diff --git a/Documentation/networking/device_drivers/intel/igbvf.rst b/Documentation/networking/device_drivers/intel/igbvf.rst index a8a9ffa4f8d3..9cddabe8108e 100644 --- a/Documentation/networking/device_drivers/intel/igbvf.rst +++ b/Documentation/networking/device_drivers/intel/igbvf.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +============================================================ Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet ============================================================ diff --git a/Documentation/networking/device_drivers/intel/ixgb.rst b/Documentation/networking/device_drivers/intel/ixgb.rst index 8bd80e27843d..945018207a92 100644 --- a/Documentation/networking/device_drivers/intel/ixgb.rst +++ b/Documentation/networking/device_drivers/intel/ixgb.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +===================================================================== Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection ===================================================================== diff --git a/Documentation/networking/device_drivers/intel/ixgbe.rst b/Documentation/networking/device_drivers/intel/ixgbe.rst index 86d887a63606..c7d25483fedb 100644 --- a/Documentation/networking/device_drivers/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/intel/ixgbe.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +============================================================================= Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters ============================================================================= diff --git a/Documentation/networking/device_drivers/intel/ixgbevf.rst b/Documentation/networking/device_drivers/intel/ixgbevf.rst index 56cde6366c2f..5d4977360157 100644 --- a/Documentation/networking/device_drivers/intel/ixgbevf.rst +++ b/Documentation/networking/device_drivers/intel/ixgbevf.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0+ +============================================================= Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet ============================================================= From a8890c38abb2f1706672cafb4094d3726430b335 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 18 Dec 2018 11:29:54 +0200 Subject: [PATCH 0636/1436] igc: Fix code redundancy Remove redundant igc_check_for_link_base code and replace it with an igc_check_for_copper_link method. Fix duplication of IGC_ADVTXD_PAYLEN_SHIFT mask declaration. Remove obsolete IGC_SCVPC register definition. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_base.c | 20 ++------------------ drivers/net/ethernet/intel/igc/igc_base.h | 3 --- drivers/net/ethernet/intel/igc/igc_regs.h | 1 - 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index df40af759542..19ff987922d2 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -53,22 +53,6 @@ out: return ret_val; } -/** - * igc_check_for_link_base - Check for link - * @hw: pointer to the HW structure - * - * If sgmii is enabled, then use the pcs register to determine link, otherwise - * use the generic interface for determining link. - */ -static s32 igc_check_for_link_base(struct igc_hw *hw) -{ - s32 ret_val = 0; - - ret_val = igc_check_for_copper_link(hw); - - return ret_val; -} - /** * igc_reset_hw_base - Reset hardware * @hw: pointer to the HW structure @@ -265,7 +249,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) if (ret_val) return ret_val; - igc_check_for_link_base(hw); + igc_check_for_copper_link(hw); /* Verify phy id and set remaining function pointers */ switch (phy->id) { @@ -512,7 +496,7 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw) static struct igc_mac_operations igc_mac_ops_base = { .init_hw = igc_init_hw_base, - .check_for_link = igc_check_for_link_base, + .check_for_link = igc_check_for_copper_link, .rar_set = igc_rar_set, .read_mac_addr = igc_read_mac_addr_base, .get_speed_and_duplex = igc_get_link_up_info_base, diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index 35588fa7b8c5..a5d3f57274a8 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -90,9 +90,6 @@ union igc_adv_rx_desc { } wb; /* writeback */ }; -/* Adv Transmit Descriptor Config Masks */ -#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ - /* Additional Transmit Descriptor Control definitions */ #define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index a1bd3216c906..f8c835283377 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -188,7 +188,6 @@ #define IGC_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ #define IGC_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ #define IGC_LENERRS 0x04138 /* Length Errors Count */ -#define IGC_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ #define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ /* Management registers */ From 2242281d6998af1b3cbf216904f4a6c78530ad3b Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 2 Jan 2019 20:20:33 +0100 Subject: [PATCH 0637/1436] ixgbe: remove magic constant in ixgbe_reset_hw_82599() ixgbe_reset_hw_82599() resets the value of hw->mac.num_rar_entries to pre-defined value of 128. Let's get rid of that hardcoded literal, and use IXGBE_82599_RAR_ENTRIES instead, the same way the normal initialization path does. Signed-off-by: Jiri Kosina Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 1e49716f52bc..109f8de5a1c2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1048,7 +1048,7 @@ mac_reset_top: * clear the multicast table. Also reset num_rar_entries to 128, * since we modify this value when programming the SAN MAC address. */ - hw->mac.num_rar_entries = 128; + hw->mac.num_rar_entries = IXGBE_82599_RAR_ENTRIES; hw->mac.ops.init_rx_addrs(hw); /* Store the permanent SAN mac address */ From ea888b03e3d784fc6617e032a1c01a2d1a04d164 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 3 Jan 2019 16:49:02 -0800 Subject: [PATCH 0638/1436] fm10k: TRIVIAL cleanup of extra spacing in function comment The function comment for fm10k_iov_msg_msix_pf has an extra space in a sentence, which is unnecessary. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 8f0a99b6a537..cb4d02629b86 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1148,7 +1148,7 @@ static void fm10k_iov_update_stats_pf(struct fm10k_hw *hw, * @results: Pointer array to message, results[0] is pointer to message * @mbx: Pointer to mailbox information structure * - * This function is a default handler for MSI-X requests from the VF. The + * This function is a default handler for MSI-X requests from the VF. The * assumption is that in this case it is acceptable to just directly * hand off the message from the VF to the underlying shared code. **/ From 979eff22c9f49b1882e417bdb750488f80302611 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 3 Jan 2019 17:17:03 -0800 Subject: [PATCH 0639/1436] e1000e: fix a missing check for return value The change is based on the issue found by Kangjie Lu where we not checking the return value of a register read/write which could result in a NULL pointer dereference if the read/write fails. Since we are only trying to disable the far-end loopback, if the read and write of register fails, we do not want to bail out of the function. We just want to log that it failed to disable and continue on. CC: Sasha Neftin CC: Kangjie Lu Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/e1000e/80003es2lan.c | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index 257bd59bc9c6..f86d55657959 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -696,11 +696,16 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, &kum_reg_data); - if (ret_val) - return ret_val; - kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; - e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - kum_reg_data); + if (!ret_val) { + kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_INBAND_PARAM, + kum_reg_data); + if (ret_val) + e_dbg("Error disabling far-end loopback\n"); + } else { + e_dbg("Error disabling far-end loopback\n"); + } ret_val = e1000e_get_auto_rd_done(hw); if (ret_val) @@ -754,11 +759,19 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) return ret_val; /* Disable IBIST slave mode (far-end loopback) */ - e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - &kum_reg_data); - kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; - e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - kum_reg_data); + ret_val = + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + &kum_reg_data); + if (!ret_val) { + kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, + E1000_KMRNCTRLSTA_INBAND_PARAM, + kum_reg_data); + if (ret_val) + e_dbg("Error disabling far-end loopback\n"); + } else { + e_dbg("Error disabling far-end loopback\n"); + } /* Set the transmit descriptor write-back policy */ reg_data = er32(TXDCTL(0)); From 75c05a74e745ae7d663b04d75777af80ada2233c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Feb 2019 00:02:58 +0100 Subject: [PATCH 0640/1436] net: dsa: mv88e6xxx: Fix counting of ATU violations The ATU port vector contains a bit per port of the switch. The code wrongly used it as a port number, and incremented a port counter. This resulted in the wrong interfaces counter being incremented, and potentially going off the end of the array of ports. Fix this by using the source port ID for the violation, which really is a port number. Reported-by: Chris Healy Tested-by: Chris Healy Fixes: 65f60e4582bd ("net: dsa: mv88e6xxx: Keep ATU/VTU violation statistics") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global1_atu.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index 5200e4bdce93..ea243840ee0f 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -314,6 +314,7 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) { struct mv88e6xxx_chip *chip = dev_id; struct mv88e6xxx_atu_entry entry; + int spid; int err; u16 val; @@ -336,6 +337,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) if (err) goto out; + spid = entry.state; + if (val & MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION) { dev_err_ratelimited(chip->dev, "ATU age out violation for %pM\n", @@ -344,23 +347,23 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU member violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_member_violation++; + "ATU member violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_member_violation++; } if (val & MV88E6XXX_G1_ATU_OP_MISS_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU miss violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_miss_violation++; + "ATU miss violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_miss_violation++; } if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) { dev_err_ratelimited(chip->dev, - "ATU full violation for %pM portvec %x\n", - entry.mac, entry.portvec); - chip->ports[entry.portvec].atu_full_violation++; + "ATU full violation for %pM portvec %x spid %d\n", + entry.mac, entry.portvec, spid); + chip->ports[spid].atu_full_violation++; } mutex_unlock(&chip->reg_lock); From bdcc5bc25548ef6b08e2e43937148f907c212292 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Feb 2019 15:38:44 -0800 Subject: [PATCH 0641/1436] mISDN: fix a race in dev_expire_timer() Since mISDN_close() uses dev->pending to iterate over active timers, there is a chance that one timer got removed from the ->pending list in dev_expire_timer() but that the thread has not called yet wake_up_interruptible() So mISDN_close() could miss this and free dev before completion of at least one dev_expire_timer() syzbot was able to catch this race : BUG: KASAN: use-after-free in register_lock_class+0x140c/0x1bf0 kernel/locking/lockdep.c:827 Write of size 8 at addr ffff88809fc18948 by task syz-executor1/24769 CPU: 1 PID: 24769 Comm: syz-executor1 Not tainted 5.0.0-rc5 #60 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 __asan_report_store8_noabort+0x17/0x20 mm/kasan/generic_report.c:140 register_lock_class+0x140c/0x1bf0 kernel/locking/lockdep.c:827 __lock_acquire+0x11f/0x4700 kernel/locking/lockdep.c:3224 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3841 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152 __wake_up_common_lock+0xc7/0x190 kernel/sched/wait.c:120 __wake_up+0xe/0x10 kernel/sched/wait.c:145 dev_expire_timer+0xe4/0x3b0 drivers/isdn/mISDN/timerdev.c:174 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 protocol 88fb is buggy, dev hsr_slave_0 protocol 88fb is buggy, dev hsr_slave_1 expire_timers kernel/time/timer.c:1362 [inline] __run_timers kernel/time/timer.c:1681 [inline] __run_timers kernel/time/timer.c:1649 [inline] run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 __do_softirq+0x266/0x95a kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x180/0x1d0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__sanitizer_cov_trace_pc+0x26/0x50 kernel/kcov.c:101 Code: 90 90 90 90 55 48 89 e5 48 8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 98 12 92 7e 81 e2 00 01 1f 00 75 2b 8b 90 d8 12 00 00 <83> fa 02 75 20 48 8b 88 e0 12 00 00 8b 80 dc 12 00 00 48 8b 11 48 RSP: 0018:ffff8880589b7a60 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffff888087ce25c0 RBX: 0000000000000001 RCX: ffffffff818f8ca3 RDX: 0000000000000000 RSI: ffffffff818f8b48 RDI: 0000000000000001 RBP: ffff8880589b7a60 R08: ffff888087ce25c0 R09: ffffed1015d25bd0 R10: ffffed1015d25bcf R11: ffff8880ae92de7b R12: ffffea0001ae4680 R13: ffffea0001ae4688 R14: 0000000000000000 R15: ffffea0001b41648 PageIdle include/linux/page-flags.h:398 [inline] page_is_idle include/linux/page_idle.h:29 [inline] mark_page_accessed+0x618/0x1140 mm/swap.c:398 touch_buffer fs/buffer.c:59 [inline] __find_get_block+0x312/0xcc0 fs/buffer.c:1298 sb_find_get_block include/linux/buffer_head.h:338 [inline] recently_deleted fs/ext4/ialloc.c:682 [inline] find_inode_bit.isra.0+0x202/0x510 fs/ext4/ialloc.c:722 __ext4_new_inode+0x14ad/0x52c0 fs/ext4/ialloc.c:914 ext4_symlink+0x3f8/0xbe0 fs/ext4/namei.c:3096 vfs_symlink fs/namei.c:4126 [inline] vfs_symlink+0x378/0x5d0 fs/namei.c:4112 do_symlinkat+0x22b/0x290 fs/namei.c:4153 __do_sys_symlink fs/namei.c:4172 [inline] __se_sys_symlink fs/namei.c:4170 [inline] __x64_sys_symlink+0x59/0x80 fs/namei.c:4170 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457b67 Code: 0f 1f 00 b8 5c 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 6d bb fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 58 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 4d bb fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff045ce0f8 EFLAGS: 00000202 ORIG_RAX: 0000000000000058 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000457b67 RDX: 00007fff045ce173 RSI: 00000000004bd63f RDI: 00007fff045ce160 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000013 R10: 0000000000000075 R11: 0000000000000202 R12: 0000000000000000 R13: 0000000000000001 R14: 000000000000029b R15: 0000000000000001 Allocated by task 24763: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc mm/kasan/common.c:496 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:469 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:504 kmem_cache_alloc_trace+0x151/0x760 mm/slab.c:3609 kmalloc include/linux/slab.h:545 [inline] mISDN_open+0x9a/0x270 drivers/isdn/mISDN/timerdev.c:59 misc_open+0x398/0x4c0 drivers/char/misc.c:141 chrdev_open+0x247/0x6b0 fs/char_dev.c:417 do_dentry_open+0x47d/0x1130 fs/open.c:771 vfs_open+0xa0/0xd0 fs/open.c:880 do_last fs/namei.c:3418 [inline] path_openat+0x10d7/0x4690 fs/namei.c:3534 do_filp_open+0x1a1/0x280 fs/namei.c:3564 do_sys_open+0x3fe/0x5d0 fs/open.c:1063 __do_sys_openat fs/open.c:1090 [inline] __se_sys_openat fs/open.c:1084 [inline] __x64_sys_openat+0x9d/0x100 fs/open.c:1084 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 24762: save_stack+0x45/0xd0 mm/kasan/common.c:73 set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:458 kasan_slab_free+0xe/0x10 mm/kasan/common.c:466 __cache_free mm/slab.c:3487 [inline] kfree+0xcf/0x230 mm/slab.c:3806 mISDN_close+0x2a1/0x390 drivers/isdn/mISDN/timerdev.c:97 __fput+0x2df/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x273/0x2c0 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88809fc18900 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 72 bytes inside of 192-byte region [ffff88809fc18900, ffff88809fc189c0) The buggy address belongs to the page: page:ffffea00027f0600 count:1 mapcount:0 mapping:ffff88812c3f0040 index:0xffff88809fc18000 flags: 0x1fffc0000000200(slab) raw: 01fffc0000000200 ffffea000269f648 ffffea00029f7408 ffff88812c3f0040 raw: ffff88809fc18000 ffff88809fc18000 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88809fc18800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88809fc18880: 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809fc18900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809fc18980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88809fc18a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Cc: Karsten Keil Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/isdn/mISDN/timerdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 211ed6cffd10..578978711887 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -170,8 +170,8 @@ dev_expire_timer(struct timer_list *t) spin_lock_irqsave(&timer->dev->lock, flags); if (timer->id >= 0) list_move_tail(&timer->list, &timer->dev->expired); - spin_unlock_irqrestore(&timer->dev->lock, flags); wake_up_interruptible(&timer->dev->wait); + spin_unlock_irqrestore(&timer->dev->lock, flags); } static int From 55fdbeaa2db8b271db767240fba24a60bd232528 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 7 Jan 2019 16:40:17 +0200 Subject: [PATCH 0642/1436] igc: Remove unused code Remove unused igc_adv_data_desc definition from igc_base.h file. Descriptors definition will be added per demand. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_base.h | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index a5d3f57274a8..76d4991d7284 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -36,28 +36,6 @@ union igc_adv_tx_desc { #define IGC_RAR_ENTRIES 16 -struct igc_adv_data_desc { - __le64 buffer_addr; /* Address of the descriptor's data buffer */ - union { - u32 data; - struct { - u32 datalen:16; /* Data buffer length */ - u32 rsvd:4; - u32 dtyp:4; /* Descriptor type */ - u32 dcmd:8; /* Descriptor command */ - } config; - } lower; - union { - u32 data; - struct { - u32 status:4; /* Descriptor status */ - u32 idx:4; - u32 popts:6; /* Packet Options */ - u32 paylen:18; /* Payload length */ - } options; - } upper; -}; - /* Receive Descriptor - Advanced */ union igc_adv_rx_desc { struct { From 439c71f7d2ca07cb5bc3ee0686f1e8a3de45b9cd Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 13 Jan 2019 11:22:31 +0200 Subject: [PATCH 0643/1436] igc: Remove unneeded code Remove the 'igc_get_link_up_info_base method' from igc_base.c file. Use the 'igc_get_speed_and_duplex_copper' method directly and reduce the code redundancy. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_base.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index 19ff987922d2..a31ec1682090 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -333,26 +333,6 @@ static void igc_release_phy_base(struct igc_hw *hw) hw->mac.ops.release_swfw_sync(hw, mask); } -/** - * igc_get_link_up_info_base - Get link speed/duplex info - * @hw: pointer to the HW structure - * @speed: stores the current speed - * @duplex: stores the current duplex - * - * This is a wrapper function, if using the serial gigabit media independent - * interface, use PCS to retrieve the link speed and duplex information. - * Otherwise, use the generic function to get the link speed and duplex info. - */ -static s32 igc_get_link_up_info_base(struct igc_hw *hw, u16 *speed, - u16 *duplex) -{ - s32 ret_val; - - ret_val = igc_get_speed_and_duplex_copper(hw, speed, duplex); - - return ret_val; -} - /** * igc_init_hw_base - Initialize hardware * @hw: pointer to the HW structure @@ -499,7 +479,7 @@ static struct igc_mac_operations igc_mac_ops_base = { .check_for_link = igc_check_for_copper_link, .rar_set = igc_rar_set, .read_mac_addr = igc_read_mac_addr_base, - .get_speed_and_duplex = igc_get_link_up_info_base, + .get_speed_and_duplex = igc_get_speed_and_duplex_copper, }; static const struct igc_phy_operations igc_phy_ops_base = { From 0f9e980bf5ee1a97e2e401c846b2af989eb21c61 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 14 Jan 2019 16:29:30 +0300 Subject: [PATCH 0644/1436] e1000e: fix cyclic resets at link up with active tx I'm seeing series of e1000e resets (sometimes endless) at system boot if something generates tx traffic at this time. In my case this is netconsole who sends message "e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames" from e1000e itself. As result e1000_watchdog_task sees used tx buffer while carrier is off and start this reset cycle again. [ 17.794359] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None [ 17.794714] IPv6: ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready [ 22.936455] e1000e 0000:02:00.0 eth1: changing MTU from 1500 to 9000 [ 23.033336] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 26.102364] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None [ 27.174495] 8021q: 802.1Q VLAN Support v1.8 [ 27.174513] 8021q: adding VLAN 0 to HW filter on device eth1 [ 30.671724] cgroup: cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation [ 30.898564] netpoll: netconsole: local port 6666 [ 30.898566] netpoll: netconsole: local IPv6 address 2a02:6b8:0:80b:beae:c5ff:fe28:23f8 [ 30.898567] netpoll: netconsole: interface 'eth1' [ 30.898568] netpoll: netconsole: remote port 6666 [ 30.898568] netpoll: netconsole: remote IPv6 address 2a02:6b8:b000:605c:e61d:2dff:fe03:3790 [ 30.898569] netpoll: netconsole: remote ethernet address b0:a8:6e:f4:ff:c0 [ 30.917747] console [netcon0] enabled [ 30.917749] netconsole: network logging started [ 31.453353] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.185730] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.321840] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.465822] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.597423] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.745417] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 34.877356] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 35.005441] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 35.157376] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 35.289362] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 35.417441] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames [ 37.790342] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None This patch flushes tx buffers only once when carrier is off rather than at each watchdog iteration. Signed-off-by: Konstantin Khlebnikov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e19e53d97ac5..736fa51878f8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5309,8 +5309,13 @@ static void e1000_watchdog_task(struct work_struct *work) /* 8000ES2LAN requires a Rx packet buffer work-around * on link down event; reset the controller to flush * the Rx packet buffer. + * + * If the link is lost the controller stops DMA, but + * if there is queued Tx work it cannot be done. So + * reset the controller to flush the Tx packet buffers. */ - if (adapter->flags & FLAG_RX_NEEDS_RESTART) + if ((adapter->flags & FLAG_RX_NEEDS_RESTART) || + e1000_desc_unused(tx_ring) + 1 < tx_ring->count) adapter->flags |= FLAG_RESTART_NOW; else pm_schedule_suspend(netdev->dev.parent, @@ -5333,14 +5338,6 @@ link_up: adapter->gotc_old = adapter->stats.gotc; spin_unlock(&adapter->stats64_lock); - /* If the link is lost the controller stops DMA, but - * if there is queued Tx work it cannot be done. So - * reset the controller to flush the Tx packet buffers. - */ - if (!netif_carrier_ok(netdev) && - (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) - adapter->flags |= FLAG_RESTART_NOW; - /* If reset is necessary, do it outside of interrupt context. */ if (adapter->flags & FLAG_RESTART_NOW) { schedule_work(&adapter->reset_task); From 109f599663b9d24f0045b1f4f576896c136942ec Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 15 Jan 2019 15:21:09 +0200 Subject: [PATCH 0645/1436] igc: Remove the 'igc_read_mac_addr_base' method Remove the redundant 'igc_read_mac_addr_base' method and use the 'igc_read_mac_addr' method directly instead. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_base.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index a31ec1682090..51a667c16dea 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -371,19 +371,6 @@ static s32 igc_init_hw_base(struct igc_hw *hw) return ret_val; } -/** - * igc_read_mac_addr_base - Read device MAC address - * @hw: pointer to the HW structure - */ -static s32 igc_read_mac_addr_base(struct igc_hw *hw) -{ - s32 ret_val = 0; - - ret_val = igc_read_mac_addr(hw); - - return ret_val; -} - /** * igc_power_down_phy_copper_base - Remove link during PHY power down * @hw: pointer to the HW structure @@ -478,7 +465,7 @@ static struct igc_mac_operations igc_mac_ops_base = { .init_hw = igc_init_hw_base, .check_for_link = igc_check_for_copper_link, .rar_set = igc_rar_set, - .read_mac_addr = igc_read_mac_addr_base, + .read_mac_addr = igc_read_mac_addr, .get_speed_and_duplex = igc_get_speed_and_duplex_copper, }; From 200a1a1a7e91eee978c76238f6ef526e3071a81c Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 15 Jan 2019 15:21:18 +0200 Subject: [PATCH 0646/1436] igc: Remove the 'igc_get_phy_id_base' method Remove the redundant 'igc_get_phy_id_base' method and use the 'igc_get_phy_id' method directly instead. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/igc_base.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index 51a667c16dea..fe9a9666c70f 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -107,22 +107,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw) return ret_val; } -/** - * igc_get_phy_id_base - Retrieve PHY addr and id - * @hw: pointer to the HW structure - * - * Retrieves the PHY address and ID for both PHY's which do and do not use - * sgmi interface. - */ -static s32 igc_get_phy_id_base(struct igc_hw *hw) -{ - s32 ret_val = 0; - - ret_val = igc_get_phy_id(hw); - - return ret_val; -} - /** * igc_init_nvm_params_base - Init NVM func ptrs. * @hw: pointer to the HW structure @@ -245,7 +229,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) goto out; } - ret_val = igc_get_phy_id_base(hw); + ret_val = igc_get_phy_id(hw); if (ret_val) return ret_val; From a865d22d593ff1310665d582e08ec78b3632cca8 Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Wed, 23 Jan 2019 14:56:30 -0800 Subject: [PATCH 0647/1436] igb: Bump version number With recent changes, need to bump the driver version to reflect the changes. Signed-off-by: Todd Fujinaka Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index dfa357b1a9d6..d35cc9697e2d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -39,7 +39,7 @@ #include "igb.h" #define MAJ 5 -#define MIN 4 +#define MIN 6 #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" From 8c5ad0dae93c9931dc32b9f4a98e73922c6ab2e0 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 30 Jan 2019 19:13:14 +0200 Subject: [PATCH 0648/1436] igc: Add ethtool support This patch adds basic ethtool support to the device to allow for configuration. Signed-off-by: Sasha Neftin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igc/Makefile | 3 +- drivers/net/ethernet/intel/igc/igc.h | 34 +- drivers/net/ethernet/intel/igc/igc_base.c | 1 + drivers/net/ethernet/intel/igc/igc_defines.h | 4 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 1032 ++++++++++++++++++ drivers/net/ethernet/intel/igc/igc_hw.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 109 +- drivers/net/ethernet/intel/igc/igc_regs.h | 3 + 8 files changed, 1169 insertions(+), 18 deletions(-) create mode 100644 drivers/net/ethernet/intel/igc/igc_ethtool.c diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 4387f6ba8e67..88c6f88baac5 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_IGC) += igc.o -igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o +igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ +igc_ethtool.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index b1039dd3dd13..80faccc34cda 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,19 +13,43 @@ #include "igc_hw.h" -/* main */ +/* forward declaration */ +void igc_set_ethtool_ops(struct net_device *); + +struct igc_adapter; +struct igc_ring; + +void igc_up(struct igc_adapter *adapter); +void igc_down(struct igc_adapter *adapter); +int igc_setup_tx_resources(struct igc_ring *ring); +int igc_setup_rx_resources(struct igc_ring *ring); +void igc_free_tx_resources(struct igc_ring *ring); +void igc_free_rx_resources(struct igc_ring *ring); +unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter); +void igc_set_flag_queue_pairs(struct igc_adapter *adapter, + const u32 max_rss_queues); +int igc_reinit_queues(struct igc_adapter *adapter); +bool igc_has_link(struct igc_adapter *adapter); +void igc_reset(struct igc_adapter *adapter); +int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); + extern char igc_driver_name[]; extern char igc_driver_version[]; +#define IGC_REGS_LEN 740 +#define IGC_RETA_SIZE 128 + /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_FLAG_HAS_MSI BIT(0) -#define IGC_FLAG_QUEUE_PAIRS BIT(4) +#define IGC_FLAG_QUEUE_PAIRS BIT(3) +#define IGC_FLAG_DMAC BIT(4) #define IGC_FLAG_NEED_LINK_UPDATE BIT(9) #define IGC_FLAG_MEDIA_RESET BIT(10) #define IGC_FLAG_MAS_ENABLE BIT(12) #define IGC_FLAG_HAS_MSIX BIT(13) #define IGC_FLAG_VLAN_PROMISC BIT(15) +#define IGC_FLAG_RX_LEGACY BIT(16) #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_4K_ITR 980 @@ -60,6 +84,7 @@ extern char igc_driver_version[]; #define IGC_RXBUFFER_2048 2048 #define IGC_RXBUFFER_3072 3072 +#define AUTO_ALL_MODES 0 #define IGC_RX_HDR_LEN IGC_RXBUFFER_256 /* RX and TX descriptor control thresholds. @@ -340,6 +365,8 @@ struct igc_adapter { struct igc_mac_addr *mac_table; + u8 rss_indir_tbl[IGC_RETA_SIZE]; + unsigned long link_check_timeout; struct igc_info ei; }; @@ -418,6 +445,9 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) return 0; } +/* forward declaration */ +void igc_reinit_locked(struct igc_adapter *); + #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) #define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index fe9a9666c70f..51a8b8769c67 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -131,6 +131,7 @@ static s32 igc_init_nvm_params_base(struct igc_hw *hw) if (size > 15) size = 15; + nvm->type = igc_nvm_eeprom_spi; nvm->word_size = BIT(size); nvm->opcode_bits = 8; nvm->delay_usec = 1; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8740754ea1fd..7d1bdcd1225a 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -4,6 +4,10 @@ #ifndef _IGC_DEFINES_H_ #define _IGC_DEFINES_H_ +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define REQ_RX_DESCRIPTOR_MULTIPLE 8 + #define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ /* PCI Bus Info */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c new file mode 100644 index 000000000000..eff37a6c0afa --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -0,0 +1,1032 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +/* ethtool support for igc */ +#include + +#include "igc.h" + +static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) + "legacy-rx", +}; + +#define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings) + +static void igc_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + strlcpy(drvinfo->driver, igc_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, igc_driver_version, sizeof(drvinfo->version)); + + /* add fw_version here */ + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN; +} + +static int igc_get_regs_len(struct net_device *netdev) +{ + return IGC_REGS_LEN * sizeof(u32); +} + +static void igc_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 *regs_buff = p; + u8 i; + + memset(p, 0, IGC_REGS_LEN * sizeof(u32)); + + regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id; + + /* General Registers */ + regs_buff[0] = rd32(IGC_CTRL); + regs_buff[1] = rd32(IGC_STATUS); + regs_buff[2] = rd32(IGC_CTRL_EXT); + regs_buff[3] = rd32(IGC_MDIC); + regs_buff[4] = rd32(IGC_CONNSW); + + /* NVM Register */ + regs_buff[5] = rd32(IGC_EECD); + + /* Interrupt */ + /* Reading EICS for EICR because they read the + * same but EICS does not clear on read + */ + regs_buff[6] = rd32(IGC_EICS); + regs_buff[7] = rd32(IGC_EICS); + regs_buff[8] = rd32(IGC_EIMS); + regs_buff[9] = rd32(IGC_EIMC); + regs_buff[10] = rd32(IGC_EIAC); + regs_buff[11] = rd32(IGC_EIAM); + /* Reading ICS for ICR because they read the + * same but ICS does not clear on read + */ + regs_buff[12] = rd32(IGC_ICS); + regs_buff[13] = rd32(IGC_ICS); + regs_buff[14] = rd32(IGC_IMS); + regs_buff[15] = rd32(IGC_IMC); + regs_buff[16] = rd32(IGC_IAC); + regs_buff[17] = rd32(IGC_IAM); + + /* Flow Control */ + regs_buff[18] = rd32(IGC_FCAL); + regs_buff[19] = rd32(IGC_FCAH); + regs_buff[20] = rd32(IGC_FCTTV); + regs_buff[21] = rd32(IGC_FCRTL); + regs_buff[22] = rd32(IGC_FCRTH); + regs_buff[23] = rd32(IGC_FCRTV); + + /* Receive */ + regs_buff[24] = rd32(IGC_RCTL); + regs_buff[25] = rd32(IGC_RXCSUM); + regs_buff[26] = rd32(IGC_RLPML); + regs_buff[27] = rd32(IGC_RFCTL); + + /* Transmit */ + regs_buff[28] = rd32(IGC_TCTL); + regs_buff[29] = rd32(IGC_TIPG); + + /* Wake Up */ + + /* MAC */ + + /* Statistics */ + regs_buff[30] = adapter->stats.crcerrs; + regs_buff[31] = adapter->stats.algnerrc; + regs_buff[32] = adapter->stats.symerrs; + regs_buff[33] = adapter->stats.rxerrc; + regs_buff[34] = adapter->stats.mpc; + regs_buff[35] = adapter->stats.scc; + regs_buff[36] = adapter->stats.ecol; + regs_buff[37] = adapter->stats.mcc; + regs_buff[38] = adapter->stats.latecol; + regs_buff[39] = adapter->stats.colc; + regs_buff[40] = adapter->stats.dc; + regs_buff[41] = adapter->stats.tncrs; + regs_buff[42] = adapter->stats.sec; + regs_buff[43] = adapter->stats.htdpmc; + regs_buff[44] = adapter->stats.rlec; + regs_buff[45] = adapter->stats.xonrxc; + regs_buff[46] = adapter->stats.xontxc; + regs_buff[47] = adapter->stats.xoffrxc; + regs_buff[48] = adapter->stats.xofftxc; + regs_buff[49] = adapter->stats.fcruc; + regs_buff[50] = adapter->stats.prc64; + regs_buff[51] = adapter->stats.prc127; + regs_buff[52] = adapter->stats.prc255; + regs_buff[53] = adapter->stats.prc511; + regs_buff[54] = adapter->stats.prc1023; + regs_buff[55] = adapter->stats.prc1522; + regs_buff[56] = adapter->stats.gprc; + regs_buff[57] = adapter->stats.bprc; + regs_buff[58] = adapter->stats.mprc; + regs_buff[59] = adapter->stats.gptc; + regs_buff[60] = adapter->stats.gorc; + regs_buff[61] = adapter->stats.gotc; + regs_buff[62] = adapter->stats.rnbc; + regs_buff[63] = adapter->stats.ruc; + regs_buff[64] = adapter->stats.rfc; + regs_buff[65] = adapter->stats.roc; + regs_buff[66] = adapter->stats.rjc; + regs_buff[67] = adapter->stats.mgprc; + regs_buff[68] = adapter->stats.mgpdc; + regs_buff[69] = adapter->stats.mgptc; + regs_buff[70] = adapter->stats.tor; + regs_buff[71] = adapter->stats.tot; + regs_buff[72] = adapter->stats.tpr; + regs_buff[73] = adapter->stats.tpt; + regs_buff[74] = adapter->stats.ptc64; + regs_buff[75] = adapter->stats.ptc127; + regs_buff[76] = adapter->stats.ptc255; + regs_buff[77] = adapter->stats.ptc511; + regs_buff[78] = adapter->stats.ptc1023; + regs_buff[79] = adapter->stats.ptc1522; + regs_buff[80] = adapter->stats.mptc; + regs_buff[81] = adapter->stats.bptc; + regs_buff[82] = adapter->stats.tsctc; + regs_buff[83] = adapter->stats.iac; + regs_buff[84] = adapter->stats.rpthc; + regs_buff[85] = adapter->stats.hgptc; + regs_buff[86] = adapter->stats.hgorc; + regs_buff[87] = adapter->stats.hgotc; + regs_buff[88] = adapter->stats.lenerrs; + regs_buff[89] = adapter->stats.scvpc; + regs_buff[90] = adapter->stats.hrmpc; + + for (i = 0; i < 4; i++) + regs_buff[91 + i] = rd32(IGC_SRRCTL(i)); + for (i = 0; i < 4; i++) + regs_buff[95 + i] = rd32(IGC_PSRTYPE(i)); + for (i = 0; i < 4; i++) + regs_buff[99 + i] = rd32(IGC_RDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[103 + i] = rd32(IGC_RDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[107 + i] = rd32(IGC_RDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[111 + i] = rd32(IGC_RDH(i)); + for (i = 0; i < 4; i++) + regs_buff[115 + i] = rd32(IGC_RDT(i)); + for (i = 0; i < 4; i++) + regs_buff[119 + i] = rd32(IGC_RXDCTL(i)); + + for (i = 0; i < 10; i++) + regs_buff[123 + i] = rd32(IGC_EITR(i)); + for (i = 0; i < 16; i++) + regs_buff[139 + i] = rd32(IGC_RAL(i)); + for (i = 0; i < 16; i++) + regs_buff[145 + i] = rd32(IGC_RAH(i)); + + for (i = 0; i < 4; i++) + regs_buff[149 + i] = rd32(IGC_TDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[152 + i] = rd32(IGC_TDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[156 + i] = rd32(IGC_TDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[160 + i] = rd32(IGC_TDH(i)); + for (i = 0; i < 4; i++) + regs_buff[164 + i] = rd32(IGC_TDT(i)); + for (i = 0; i < 4; i++) + regs_buff[168 + i] = rd32(IGC_TXDCTL(i)); +} + +static u32 igc_get_msglevel(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void igc_set_msglevel(struct net_device *netdev, u32 data) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = data; +} + +static int igc_nway_reset(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + if (netif_running(netdev)) + igc_reinit_locked(adapter); + return 0; +} + +static u32 igc_get_link(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_mac_info *mac = &adapter->hw.mac; + + /* If the link is not reported up to netdev, interrupts are disabled, + * and so the physical link state may have changed since we last + * looked. Set get_link_status to make sure that the true link + * state is interrogated, rather than pulling a cached and possibly + * stale link state from the driver. + */ + if (!netif_carrier_ok(netdev)) + mac->get_link_status = 1; + + return igc_has_link(adapter); +} + +static int igc_get_eeprom_len(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + return adapter->hw.nvm.word_size * 2; +} + +static int igc_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + int first_word, last_word; + u16 *eeprom_buff; + int ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + + eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16), + GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + if (hw->nvm.type == igc_nvm_eeprom_spi) { + ret_val = hw->nvm.ops.read(hw, first_word, + last_word - first_word + 1, + eeprom_buff); + } else { + for (i = 0; i < last_word - first_word + 1; i++) { + ret_val = hw->nvm.ops.read(hw, first_word + i, 1, + &eeprom_buff[i]); + if (ret_val) + break; + } + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), + eeprom->len); + kfree(eeprom_buff); + + return ret_val; +} + +static int igc_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + int max_len, first_word, last_word, ret_val = 0; + u16 *eeprom_buff; + void *ptr; + u16 i; + + if (eeprom->len == 0) + return -EOPNOTSUPP; + + if (hw->mac.type >= igc_i225 && + !igc_get_flash_presence_i225(hw)) { + return -EOPNOTSUPP; + } + + if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EFAULT; + + max_len = hw->nvm.word_size * 2; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(max_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ptr = (void *)eeprom_buff; + + if (eeprom->offset & 1) { + /* need read/modify/write of first changed EEPROM word + * only the second byte of the word is being modified + */ + ret_val = hw->nvm.ops.read(hw, first_word, 1, + &eeprom_buff[0]); + ptr++; + } + if (((eeprom->offset + eeprom->len) & 1) && ret_val == 0) { + /* need read/modify/write of last changed EEPROM word + * only the first byte of the word is being modified + */ + ret_val = hw->nvm.ops.read(hw, last_word, 1, + &eeprom_buff[last_word - first_word]); + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(ptr, bytes, eeprom->len); + + for (i = 0; i < last_word - first_word + 1; i++) + eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]); + + ret_val = hw->nvm.ops.write(hw, first_word, + last_word - first_word + 1, eeprom_buff); + + /* Update the checksum if nvm write succeeded */ + if (ret_val == 0) + hw->nvm.ops.update(hw); + + /* check if need: igc_set_fw_version(adapter); */ + kfree(eeprom_buff); + return ret_val; +} + +static void igc_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IGC_MAX_RXD; + ring->tx_max_pending = IGC_MAX_TXD; + ring->rx_pending = adapter->rx_ring_count; + ring->tx_pending = adapter->tx_ring_count; +} + +static int igc_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_ring *temp_ring; + u16 new_rx_count, new_tx_count; + int i, err = 0; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + new_rx_count = min_t(u32, ring->rx_pending, IGC_MAX_RXD); + new_rx_count = max_t(u16, new_rx_count, IGC_MIN_RXD); + new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE); + + new_tx_count = min_t(u32, ring->tx_pending, IGC_MAX_TXD); + new_tx_count = max_t(u16, new_tx_count, IGC_MIN_TXD); + new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE); + + if (new_tx_count == adapter->tx_ring_count && + new_rx_count == adapter->rx_ring_count) { + /* nothing to do */ + return 0; + } + + while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (!netif_running(adapter->netdev)) { + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->count = new_rx_count; + adapter->tx_ring_count = new_tx_count; + adapter->rx_ring_count = new_rx_count; + goto clear_reset; + } + + if (adapter->num_tx_queues > adapter->num_rx_queues) + temp_ring = vmalloc(array_size(sizeof(struct igc_ring), + adapter->num_tx_queues)); + else + temp_ring = vmalloc(array_size(sizeof(struct igc_ring), + adapter->num_rx_queues)); + + if (!temp_ring) { + err = -ENOMEM; + goto clear_reset; + } + + igc_down(adapter); + + /* We can't just free everything and then setup again, + * because the ISRs in MSI-X mode get passed pointers + * to the Tx and Rx ring structs. + */ + if (new_tx_count != adapter->tx_ring_count) { + for (i = 0; i < adapter->num_tx_queues; i++) { + memcpy(&temp_ring[i], adapter->tx_ring[i], + sizeof(struct igc_ring)); + + temp_ring[i].count = new_tx_count; + err = igc_setup_tx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igc_free_tx_resources(&temp_ring[i]); + } + goto err_setup; + } + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + igc_free_tx_resources(adapter->tx_ring[i]); + + memcpy(adapter->tx_ring[i], &temp_ring[i], + sizeof(struct igc_ring)); + } + + adapter->tx_ring_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_ring_count) { + for (i = 0; i < adapter->num_rx_queues; i++) { + memcpy(&temp_ring[i], adapter->rx_ring[i], + sizeof(struct igc_ring)); + + temp_ring[i].count = new_rx_count; + err = igc_setup_rx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igc_free_rx_resources(&temp_ring[i]); + } + goto err_setup; + } + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + igc_free_rx_resources(adapter->rx_ring[i]); + + memcpy(adapter->rx_ring[i], &temp_ring[i], + sizeof(struct igc_ring)); + } + + adapter->rx_ring_count = new_rx_count; + } +err_setup: + igc_up(adapter); + vfree(temp_ring); +clear_reset: + clear_bit(__IGC_RESETTING, &adapter->state); + return err; +} + +static void igc_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + + pause->autoneg = + (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); + + if (hw->fc.current_mode == igc_fc_rx_pause) { + pause->rx_pause = 1; + } else if (hw->fc.current_mode == igc_fc_tx_pause) { + pause->tx_pause = 1; + } else if (hw->fc.current_mode == igc_fc_full) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +static int igc_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + int retval = 0; + + adapter->fc_autoneg = pause->autoneg; + + while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (adapter->fc_autoneg == AUTONEG_ENABLE) { + hw->fc.requested_mode = igc_fc_default; + if (netif_running(adapter->netdev)) { + igc_down(adapter); + igc_up(adapter); + } else { + igc_reset(adapter); + } + } else { + if (pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = igc_fc_full; + else if (pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = igc_fc_rx_pause; + else if (!pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = igc_fc_tx_pause; + else if (!pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = igc_fc_none; + + hw->fc.current_mode = hw->fc.requested_mode; + + retval = ((hw->phy.media_type == igc_media_type_copper) ? + igc_force_mac_fc(hw) : igc_setup_link(hw)); + } + + clear_bit(__IGC_RESETTING, &adapter->state); + return retval; +} + +static int igc_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + if (adapter->rx_itr_setting <= 3) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; + + if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) { + if (adapter->tx_itr_setting <= 3) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; + } + + return 0; +} + +static int igc_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + int i; + + if (ec->rx_max_coalesced_frames || + ec->rx_coalesce_usecs_irq || + ec->rx_max_coalesced_frames_irq || + ec->tx_max_coalesced_frames || + ec->tx_coalesce_usecs_irq || + ec->stats_block_coalesce_usecs || + ec->use_adaptive_rx_coalesce || + ec->use_adaptive_tx_coalesce || + ec->pkt_rate_low || + ec->rx_coalesce_usecs_low || + ec->rx_max_coalesced_frames_low || + ec->tx_coalesce_usecs_low || + ec->tx_max_coalesced_frames_low || + ec->pkt_rate_high || + ec->rx_coalesce_usecs_high || + ec->rx_max_coalesced_frames_high || + ec->tx_coalesce_usecs_high || + ec->tx_max_coalesced_frames_high || + ec->rate_sample_interval) + return -ENOTSUPP; + + if (ec->rx_coalesce_usecs > IGC_MAX_ITR_USECS || + (ec->rx_coalesce_usecs > 3 && + ec->rx_coalesce_usecs < IGC_MIN_ITR_USECS) || + ec->rx_coalesce_usecs == 2) + return -EINVAL; + + if (ec->tx_coalesce_usecs > IGC_MAX_ITR_USECS || + (ec->tx_coalesce_usecs > 3 && + ec->tx_coalesce_usecs < IGC_MIN_ITR_USECS) || + ec->tx_coalesce_usecs == 2) + return -EINVAL; + + if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + return -EINVAL; + + /* If ITR is disabled, disable DMAC */ + if (ec->rx_coalesce_usecs == 0) { + if (adapter->flags & IGC_FLAG_DMAC) + adapter->flags &= ~IGC_FLAG_DMAC; + } + + /* convert to rate of irq's per second */ + if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) + adapter->rx_itr_setting = ec->rx_coalesce_usecs; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + + /* convert to rate of irq's per second */ + if (adapter->flags & IGC_FLAG_QUEUE_PAIRS) + adapter->tx_itr_setting = adapter->rx_itr_setting; + else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3) + adapter->tx_itr_setting = ec->tx_coalesce_usecs; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igc_q_vector *q_vector = adapter->q_vector[i]; + + q_vector->tx.work_limit = adapter->tx_work_limit; + if (q_vector->rx.ring) + q_vector->itr_val = adapter->rx_itr_setting; + else + q_vector->itr_val = adapter->tx_itr_setting; + if (q_vector->itr_val && q_vector->itr_val <= 3) + q_vector->itr_val = IGC_START_ITR; + q_vector->set_itr = 1; + } + + return 0; +} + +void igc_write_rss_indir_tbl(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 reg = IGC_RETA(0); + u32 shift = 0; + int i = 0; + + while (i < IGC_RETA_SIZE) { + u32 val = 0; + int j; + + for (j = 3; j >= 0; j--) { + val <<= 8; + val |= adapter->rss_indir_tbl[i + j]; + } + + wr32(reg, val << shift); + reg += 4; + i += 4; + } +} + +static u32 igc_get_rxfh_indir_size(struct net_device *netdev) +{ + return IGC_RETA_SIZE; +} + +static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + int i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + if (!indir) + return 0; + for (i = 0; i < IGC_RETA_SIZE; i++) + indir[i] = adapter->rss_indir_tbl[i]; + + return 0; +} + +static int igc_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + u32 num_queues; + int i; + + /* We do not allow change in unsupported parameters */ + if (key || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + if (!indir) + return 0; + + num_queues = adapter->rss_queues; + + /* Verify user input. */ + for (i = 0; i < IGC_RETA_SIZE; i++) + if (indir[i] >= num_queues) + return -EINVAL; + + for (i = 0; i < IGC_RETA_SIZE; i++) + adapter->rss_indir_tbl[i] = indir[i]; + + igc_write_rss_indir_tbl(adapter); + + return 0; +} + +static unsigned int igc_max_channels(struct igc_adapter *adapter) +{ + return igc_get_max_rss_queues(adapter); +} + +static void igc_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + /* Report maximum channels */ + ch->max_combined = igc_max_channels(adapter); + + /* Report info for other vector */ + if (adapter->flags & IGC_FLAG_HAS_MSIX) { + ch->max_other = NON_Q_VECTORS; + ch->other_count = NON_Q_VECTORS; + } + + ch->combined_count = adapter->rss_queues; +} + +static int igc_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + unsigned int count = ch->combined_count; + unsigned int max_combined = 0; + + /* Verify they are not requesting separate vectors */ + if (!count || ch->rx_count || ch->tx_count) + return -EINVAL; + + /* Verify other_count is valid and has not been changed */ + if (ch->other_count != NON_Q_VECTORS) + return -EINVAL; + + /* Verify the number of channels doesn't exceed hw limits */ + max_combined = igc_max_channels(adapter); + if (count > max_combined) + return -EINVAL; + + if (count != adapter->rss_queues) { + adapter->rss_queues = count; + igc_set_flag_queue_pairs(adapter, max_combined); + + /* Hardware has to reinitialize queues and interrupts to + * match the new configuration. + */ + return igc_reinit_queues(adapter); + } + + return 0; +} + +static u32 igc_get_priv_flags(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags & IGC_FLAG_RX_LEGACY) + priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX; + + return priv_flags; +} + +static int igc_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + unsigned int flags = adapter->flags; + + flags &= ~IGC_FLAG_RX_LEGACY; + if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX) + flags |= IGC_FLAG_RX_LEGACY; + + if (flags != adapter->flags) { + adapter->flags = flags; + + /* reset interface to repopulate queues */ + if (netif_running(netdev)) + igc_reinit_locked(adapter); + } + + return 0; +} + +static int igc_ethtool_begin(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + pm_runtime_get_sync(&adapter->pdev->dev); + return 0; +} + +static void igc_ethtool_complete(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + pm_runtime_put(&adapter->pdev->dev); +} + +static int igc_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 status; + u32 speed; + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + + /* supported link modes */ + ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, 2500baseT_Full); + + /* twisted pair */ + cmd->base.port = PORT_TP; + cmd->base.phy_address = hw->phy.addr; + + /* advertising link modes */ + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); + + /* set autoneg settings */ + if (hw->mac.autoneg == 1) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + Autoneg); + } + + switch (hw->fc.requested_mode) { + case igc_fc_full: + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + break; + case igc_fc_rx_pause: + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + Asym_Pause); + break; + case igc_fc_tx_pause: + ethtool_link_ksettings_add_link_mode(cmd, advertising, + Asym_Pause); + break; + default: + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + Asym_Pause); + } + + status = rd32(IGC_STATUS); + + if (status & IGC_STATUS_LU) { + if (status & IGC_STATUS_SPEED_1000) { + /* For I225, STATUS will indicate 1G speed in both + * 1 Gbps and 2.5 Gbps link modes. + * An additional bit is used + * to differentiate between 1 Gbps and 2.5 Gbps. + */ + if (hw->mac.type == igc_i225 && + (status & IGC_STATUS_SPEED_2500)) { + speed = SPEED_2500; + hw_dbg("2500 Mbs, "); + } else { + speed = SPEED_1000; + hw_dbg("1000 Mbs, "); + } + } else if (status & IGC_STATUS_SPEED_100) { + speed = SPEED_100; + hw_dbg("100 Mbs, "); + } else { + speed = SPEED_10; + hw_dbg("10 Mbs, "); + } + if ((status & IGC_STATUS_FD) || + hw->phy.media_type != igc_media_type_copper) + cmd->base.duplex = DUPLEX_FULL; + else + cmd->base.duplex = DUPLEX_HALF; + } else { + speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + cmd->base.speed = speed; + if (hw->mac.autoneg) + cmd->base.autoneg = AUTONEG_ENABLE; + else + cmd->base.autoneg = AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == igc_media_type_copper) + cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->phy.mdix == AUTO_ALL_MODES) + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix; + + return 0; +} + +static int igc_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 advertising; + + /* When adapter in resetting mode, autoneg/speed/duplex + * cannot be changed + */ + if (igc_check_reset_block(hw)) { + dev_err(&adapter->pdev->dev, + "Cannot change link characteristics when reset is active.\n"); + return -EINVAL; + } + + /* MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (cmd->base.eth_tp_mdix_ctrl) { + if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO && + cmd->base.autoneg != AUTONEG_ENABLE) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + + while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + hw->mac.autoneg = 1; + hw->phy.autoneg_advertised = advertising; + if (adapter->fc_autoneg) + hw->fc.requested_mode = igc_fc_default; + } else { + /* calling this overrides forced MDI setting */ + dev_info(&adapter->pdev->dev, + "Force mode currently not supported\n"); + } + + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (cmd->base.eth_tp_mdix_ctrl) { + /* fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl; + } + + /* reset the link */ + if (netif_running(adapter->netdev)) { + igc_down(adapter); + igc_up(adapter); + } else { + igc_reset(adapter); + } + + clear_bit(__IGC_RESETTING, &adapter->state); + + return 0; +} + +static const struct ethtool_ops igc_ethtool_ops = { + .get_drvinfo = igc_get_drvinfo, + .get_regs_len = igc_get_regs_len, + .get_regs = igc_get_regs, + .get_msglevel = igc_get_msglevel, + .set_msglevel = igc_set_msglevel, + .nway_reset = igc_nway_reset, + .get_link = igc_get_link, + .get_eeprom_len = igc_get_eeprom_len, + .get_eeprom = igc_get_eeprom, + .set_eeprom = igc_set_eeprom, + .get_ringparam = igc_get_ringparam, + .set_ringparam = igc_set_ringparam, + .get_pauseparam = igc_get_pauseparam, + .set_pauseparam = igc_set_pauseparam, + .get_coalesce = igc_get_coalesce, + .set_coalesce = igc_set_coalesce, + .get_rxfh_indir_size = igc_get_rxfh_indir_size, + .get_rxfh = igc_get_rxfh, + .set_rxfh = igc_set_rxfh, + .get_channels = igc_get_channels, + .set_channels = igc_set_channels, + .get_priv_flags = igc_get_priv_flags, + .set_priv_flags = igc_set_priv_flags, + .begin = igc_ethtool_begin, + .complete = igc_ethtool_complete, + .get_link_ksettings = igc_get_link_ksettings, + .set_link_ksettings = igc_set_link_ksettings, +}; + +void igc_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &igc_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index c50414f48f0d..7c88b7bd4799 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -55,6 +55,7 @@ enum igc_media_type { enum igc_nvm_type { igc_nvm_unknown = 0, + igc_nvm_eeprom_spi, igc_nvm_flash_hw, igc_nvm_invm, }; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f20183037fb2..11c75433fe22 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -12,6 +12,8 @@ #define DRV_VERSION "0.0.1-k" #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) + static int debug = -1; MODULE_AUTHOR("Intel Corporation, "); @@ -66,7 +68,7 @@ enum latency_range { latency_invalid = 255 }; -static void igc_reset(struct igc_adapter *adapter) +void igc_reset(struct igc_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct igc_hw *hw = &adapter->hw; @@ -150,7 +152,7 @@ static void igc_get_hw_control(struct igc_adapter *adapter) * * Free all transmit software resources */ -static void igc_free_tx_resources(struct igc_ring *tx_ring) +void igc_free_tx_resources(struct igc_ring *tx_ring) { igc_clean_tx_ring(tx_ring); @@ -261,7 +263,7 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter) * * Return 0 on success, negative on failure */ -static int igc_setup_tx_resources(struct igc_ring *tx_ring) +int igc_setup_tx_resources(struct igc_ring *tx_ring) { struct device *dev = tx_ring->dev; int size = 0; @@ -381,7 +383,7 @@ static void igc_clean_all_rx_rings(struct igc_adapter *adapter) * * Free all receive software resources */ -static void igc_free_rx_resources(struct igc_ring *rx_ring) +void igc_free_rx_resources(struct igc_ring *rx_ring) { igc_clean_rx_ring(rx_ring); @@ -418,7 +420,7 @@ static void igc_free_all_rx_resources(struct igc_adapter *adapter) * * Returns 0 on success, negative on failure */ -static int igc_setup_rx_resources(struct igc_ring *rx_ring) +int igc_setup_rx_resources(struct igc_ring *rx_ring) { struct device *dev = rx_ring->dev; int size, desc_len; @@ -1703,7 +1705,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) * igc_up - Open the interface and prepare it to handle traffic * @adapter: board private structure */ -static void igc_up(struct igc_adapter *adapter) +void igc_up(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; int i = 0; @@ -1748,7 +1750,7 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter) * igc_down - Close the interface * @adapter: board private structure */ -static void igc_down(struct igc_adapter *adapter) +void igc_down(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct igc_hw *hw = &adapter->hw; @@ -1810,7 +1812,7 @@ static void igc_down(struct igc_adapter *adapter) igc_clean_all_rx_rings(adapter); } -static void igc_reinit_locked(struct igc_adapter *adapter) +void igc_reinit_locked(struct igc_adapter *adapter) { WARN_ON(in_interrupt()); while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) @@ -1922,7 +1924,7 @@ static void igc_configure(struct igc_adapter *adapter) /** * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table - * @adapter: Pointer to adapter structure + * @adapter: address of board private structure * @index: Index of the RAR entry which need to be synced with MAC table */ static void igc_rar_set_index(struct igc_adapter *adapter, u32 index) @@ -2298,7 +2300,7 @@ static void igc_update_phy_info(struct timer_list *t) * igc_has_link - check shared code for link and determine up/down * @adapter: pointer to driver private info */ -static bool igc_has_link(struct igc_adapter *adapter) +bool igc_has_link(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; bool link_active = false; @@ -3501,6 +3503,57 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) return value; } +int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) +{ + struct pci_dev *pdev = adapter->pdev; + struct igc_mac_info *mac = &adapter->hw.mac; + + mac->autoneg = 0; + + /* Make sure dplx is at most 1 bit and lsb of speed is not set + * for the switch() below to work + */ + if ((spd & 1) || (dplx & ~1)) + goto err_inval; + + switch (spd + dplx) { + case SPEED_10 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_10_HALF; + break; + case SPEED_10 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_10_FULL; + break; + case SPEED_100 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_100_HALF; + break; + case SPEED_100 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_100_FULL; + break; + case SPEED_1000 + DUPLEX_FULL: + mac->autoneg = 1; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + DUPLEX_HALF: /* not supported */ + goto err_inval; + case SPEED_2500 + DUPLEX_FULL: + mac->autoneg = 1; + adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; + break; + case SPEED_2500 + DUPLEX_HALF: /* not supported */ + default: + goto err_inval; + } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + + return 0; + +err_inval: + dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); + return -EINVAL; +} + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -3568,7 +3621,7 @@ static int igc_probe(struct pci_dev *pdev, hw = &adapter->hw; hw->back = adapter; adapter->port_num = hw->bus.func; - adapter->msg_enable = GENMASK(debug - 1, 0); + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); err = pci_save_state(pdev); if (err) @@ -3584,7 +3637,7 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; - + igc_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netdev->mem_start = pci_resource_start(pdev, 0); @@ -3744,8 +3797,8 @@ static struct pci_driver igc_driver = { .remove = igc_remove, }; -static void igc_set_flag_queue_pairs(struct igc_adapter *adapter, - const u32 max_rss_queues) +void igc_set_flag_queue_pairs(struct igc_adapter *adapter, + const u32 max_rss_queues) { /* Determine if we need to pair queues. */ /* If rss_queues > half of max_rss_queues, pair the queues in @@ -3757,7 +3810,7 @@ static void igc_set_flag_queue_pairs(struct igc_adapter *adapter, adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; } -static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) +unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) { unsigned int max_rss_queues; @@ -3836,6 +3889,32 @@ static int igc_sw_init(struct igc_adapter *adapter) return 0; } +/** + * igc_reinit_queues - return error + * @adapter: pointer to adapter structure + */ +int igc_reinit_queues(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (netif_running(netdev)) + igc_close(netdev); + + igc_reset_interrupt_capability(adapter); + + if (igc_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + err = igc_open(netdev); + + return err; +} + /** * igc_get_hw_dev - return device * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index f8c835283377..5afe7a8d3faf 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -80,6 +80,9 @@ /* MSI-X Table Register Descriptions */ #define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */ +/* Redirection Table - RW Array */ +#define IGC_RETA(_i) (0x05C00 + ((_i) * 4)) + /* Receive Register Descriptions */ #define IGC_RCTL 0x00100 /* Rx Control - RW */ #define IGC_SRRCTL(_n) (0x0C00C + ((_n) * 0x40)) From 206dafb0a3a1defec131be77b7c3e14b2731fdf1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 5 Feb 2019 14:28:44 -0800 Subject: [PATCH 0649/1436] tools/bpf: fix a selftest test_btf failure Commit 9c651127445c ("selftests/btf: add initial BTF dedup tests") added dedup tests in test_btf.c. It broke the raw test: BTF raw test[71] (func proto (Bad arg name_off)): btf_raw_create:2905:FAIL Error getting string #65535, strs_cnt:1 The test itself encodes invalid func_proto parameter name offset 0xffffFFFF as a negative test for the kernel. The above commit changed the meaning of that offset and resulted in a user space error. #define NAME_NTH(N) (0xffff0000 | N) #define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) Currently, the kernel permits maximum name offset 0xffff. Set the test name off as 0x0fffFFFF to trigger the kernel verification failure. Cc: Andrii Nakryiko Fixes: 9c651127445c ("selftests/btf: add initial BTF dedup tests") Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 30c3edde7e07..447acc34db94 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1978,7 +1978,7 @@ static struct btf_raw_test raw_tests[] = { /* void (*)(int a, unsigned int ) */ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_FUNC_PROTO_ARG_ENC(0x0fffffff, 2), BTF_END_RAW, }, .str_sec = "\0a", From a6c109a6b746509f5030d20d57b35e566cf3a04f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 5 Feb 2019 11:48:22 -0800 Subject: [PATCH 0650/1436] tools/bpf: add const qualifier to btf__get_map_kv_tids() map_name parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function") added the API function btf__get_map_kv_tids(): btf__get_map_kv_tids(const struct btf *btf, char *map_name, ...) The parameter map_name has type "char *". This is okay inside libbpf library since the map_name is from bpf_map->name which also has type "char *". This will be problematic if the caller for map_name already has attribute "const", e.g., from C++ string.c_str(). It will result in either a warning or an error. /home/yhs/work/bcc/src/cc/btf.cc:166:51: error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive] return btf__get_map_kv_tids(btf_, map_name.c_str() This patch added "const" attributes to map_name parameter. Fixes: 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function") Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 2 +- tools/lib/bpf/btf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 4949f8840bda..3b3a2959d03a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -511,7 +511,7 @@ exit_free: return err; } -int btf__get_map_kv_tids(const struct btf *btf, char *map_name, +int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, __u32 *key_type_id, __u32 *value_type_id) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 25a9d2db035d..b393da90cc85 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -69,7 +69,7 @@ LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings, __u32 *str_len); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, char *map_name, +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, __u32 *key_type_id, __u32 *value_type_id); From f7748e2952387f1a67db3bc618050149427420c9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 5 Feb 2019 21:38:30 -0800 Subject: [PATCH 0651/1436] tools/bpf: silence a libbpf unnecessary warning Commit 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function") refactored function bpf_map_find_btf_info() and moved bulk of implementation into btf.c as btf__get_map_kv_tids(). This change introduced a bug such that test_btf will print out the following warning although the test passed: BTF libbpf test[2] (test_btf_nokv.o): libbpf: map:btf_map container_name:____btf_map_btf_map cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR? Previously, the error message is guarded with pr_debug(). Commit 96408c43447a changed it to pr_warning() and hence caused the warning. Restoring to pr_debug() for the message fixed the issue. Fixes: 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3b3a2959d03a..ab6528c935a1 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -531,8 +531,8 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, container_id = btf__find_by_name(btf, container_name); if (container_id < 0) { - pr_warning("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map_name, container_name); + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); return container_id; } From c97617a81a7616d49bc3700959e08c6c6f447093 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 5 Feb 2019 17:57:27 +0100 Subject: [PATCH 0652/1436] ALSA: hda/ca0132 - Fix build error without CONFIG_PCI A call of pci_iounmap() call without CONFIG_PCI leads to a build error on some architectures. We tried to address this and add a check of IS_ENABLED(CONFIG_PCI), but this still doesn't seem enough for sh. Ideally we should fix it globally, it's really a corner case, so let's paper over it with a simpler ifdef. Fixes: 1e73359a24fa ("ALSA: hda/ca0132 - make pci_iounmap() call conditional") Reported-by: Kuninori Morimoto Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index e5bdbc245682..29882bda7632 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -8451,8 +8451,10 @@ static void ca0132_free(struct hda_codec *codec) ca0132_exit_chip(codec); snd_hda_power_down(codec); - if (IS_ENABLED(CONFIG_PCI) && spec->mem_base) +#ifdef CONFIG_PCI + if (spec->mem_base) pci_iounmap(codec->bus->pci, spec->mem_base); +#endif kfree(spec->spec_init_verbs); kfree(codec->spec); } From d028a646e84b9b131e4ff2cb5bbdd3825d141028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Feb 2019 16:18:46 +0200 Subject: [PATCH 0653/1436] drm/i915: Try to sanitize bogus DPLL state left over by broken SNB BIOSen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain SNB machines (eg. ASUS K53SV) seem to have a broken BIOS which misprograms the hardware badly when encountering a suitably high resolution display. The programmed pipe timings are somewhat bonkers and the DPLL is totally misprogrammed (P divider == 0). That will result in atomic commit timeouts as apparently the pipe is sufficiently stuck to not signal vblank interrupts. IIRC something like this was also observed on some other SNB machine years ago (might have been a Dell XPS 8300) but a BIOS update cured it. Sadly looks like this was never fixed for the ASUS K53SV as the latest BIOS (K53SV.320 11/11/2011) is still broken. The quickest way to deal with this seems to be to shut down the pipe+ports+DPLL. Unfortunately doing this during the normal sanitization phase isn't quite soon enough as we already spew several WARNs about the bogus hardware state. But it's better than hanging the boot for a few dozen seconds. Since this is limited to a few old machines it doesn't seem entirely worthwile to try and rework the readout+sanitization code to handle it more gracefully. v2: Fix potential NULL deref (kbuild test robot) Constify has_bogus_dpll_config() Cc: stable@vger.kernel.org # v4.20+ Cc: Daniel Kamil Kozar Reported-by: Daniel Kamil Kozar Tested-by: Daniel Kamil Kozar Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109245 Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190111174950.10681-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola (cherry picked from commit 7bed8adcd9f86231bb69bbc02f88ad89330f99e3) Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190205141846.6053-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 50 ++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3da9c0f9e948..248128126422 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15415,16 +15415,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, } } +static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + /* + * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram + * the hardware when a high res displays plugged in. DPLL P + * divider is zero, and the pipe timings are bonkers. We'll + * try to disable everything in that case. + * + * FIXME would be nice to be able to sanitize this state + * without several WARNs, but for now let's take the easy + * road. + */ + return IS_GEN6(dev_priv) && + crtc_state->base.active && + crtc_state->shared_dpll && + crtc_state->port_clock == 0; +} + static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_connector *connector; + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc_state *crtc_state = crtc ? + to_intel_crtc_state(crtc->base.state) : NULL; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ - bool has_active_crtc = encoder->base.crtc && - to_intel_crtc(encoder->base.crtc)->active; + bool has_active_crtc = crtc_state && + crtc_state->base.active; + + if (crtc_state && has_bogus_dpll_config(crtc_state)) { + DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n", + pipe_name(crtc->pipe)); + has_active_crtc = false; + } connector = intel_encoder_find_connector(encoder); if (connector && !has_active_crtc) { @@ -15435,16 +15464,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ - if (encoder->base.crtc) { - struct drm_crtc_state *crtc_state = encoder->base.crtc->state; + if (crtc_state) { + struct drm_encoder *best_encoder; DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, encoder->base.name); + + /* avoid oopsing in case the hooks consult best_encoder */ + best_encoder = connector->base.state->best_encoder; + connector->base.state->best_encoder = &encoder->base; + if (encoder->disable) - encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->disable(encoder, crtc_state, + connector->base.state); if (encoder->post_disable) - encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state); + encoder->post_disable(encoder, crtc_state, + connector->base.state); + + connector->base.state->best_encoder = best_encoder; } encoder->base.crtc = NULL; From d5d27fd9826b59979b184ec288e4812abac0e988 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Tue, 5 Feb 2019 16:52:51 +0100 Subject: [PATCH 0654/1436] mtd: rawnand: gpmi: fix MX28 bus master lockup problem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable BCH soft reset according to MX23 erratum #2847 ("BCH soft reset may cause bus master lock up") for MX28 too. It has the same problem. Observed problem: once per 100,000+ MX28 reboots NAND read failed on DMA timeout errors: [ 1.770823] UBI: attaching mtd3 to ubi0 [ 2.768088] gpmi_nand: DMA timeout, last DMA :1 [ 3.958087] gpmi_nand: BCH timeout, last DMA :1 [ 4.156033] gpmi_nand: Error in ECC-based read: -110 [ 4.161136] UBI warning: ubi_io_read: error -110 while reading 64 bytes from PEB 0:0, read only 0 bytes, retry [ 4.171283] step 1 error [ 4.173846] gpmi_nand: Chip: 0, Error -1 Without BCH soft reset we successfully executed 1,000,000 MX28 reboots. I have a quote from NXP regarding this problem, from July 18th 2016: "As the i.MX23 and i.MX28 are of the same generation, they share many characteristics. Unfortunately, also the erratas may be shared. In case of the documented erratas and the workarounds, you can also apply the workaround solution of one device on the other one. This have been reported, but I’m afraid that there are not an estimated date for updating the Errata documents. Please accept our apologies for any inconveniences this may cause." Fixes: 6f2a6a52560a ("mtd: nand: gpmi: reset BCH earlier, too, to avoid NAND startup problems") Cc: stable@vger.kernel.org Signed-off-by: Manfred Schlaegl Signed-off-by: Martin Kepplinger Reviewed-by: Miquel Raynal Reviewed-by: Fabio Estevam Acked-by: Han Xu Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c index bd4cfac6b5aa..a4768df5083f 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c @@ -155,9 +155,10 @@ int gpmi_init(struct gpmi_nand_data *this) /* * Reset BCH here, too. We got failures otherwise :( - * See later BCH reset for explanation of MX23 handling + * See later BCH reset for explanation of MX23 and MX28 handling */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; @@ -263,12 +264,10 @@ int bch_set_geometry(struct gpmi_nand_data *this) /* * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. - * On the other hand, the MX28 needs the reset, because one case has been - * seen where the BCH produced ECC errors constantly after 10000 - * consecutive reboots. The latter case has not been seen on the MX23 - * yet, still we don't know if it could happen there as well. + * and MX28. */ - ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); + ret = gpmi_reset_block(r->bch_regs, + GPMI_IS_MX23(this) || GPMI_IS_MX28(this)); if (ret) goto err_out; From 0acd99282bef617fdbc4dff29359fe8160f00846 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 07:59:41 +0200 Subject: [PATCH 0655/1436] cfg80211: pmsr: fix MAC address setting When we *don't* have a MAC address attribute, we shouldn't try to use this - this was intended to copy the local MAC address instead, so fix it. Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API") Signed-off-by: Johannes Berg --- net/wireless/pmsr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index de9286703280..f2e388e329fd 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) if (err) goto out_err; } else { - memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]), - ETH_ALEN); + memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN); memset(req->mac_addr_mask, 0xff, ETH_ALEN); } From 73350424bec9c76cf42d4d502ff156c7d5daf191 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 08:03:10 +0200 Subject: [PATCH 0656/1436] cfg80211: pmsr: fix abort locking When we destroy the interface we already hold the wdev->mtx while calling cfg80211_pmsr_wdev_down(), which assumes this isn't true and flushes the worker that takes the lock, thus leading to a deadlock. Fix this by refactoring the worker and calling its code in cfg80211_pmsr_wdev_down() directly. We still need to flush the work later to make sure it's not still running and will crash, but it will not do anything. Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 ++ net/wireless/pmsr.c | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 623dfe5e211c..b36ad8efb5e5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) ASSERT_RTNL(); + flush_work(&wdev->pmsr_free_wk); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); list_del_rcu(&wdev->list); diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index f2e388e329fd..78c3f5633692 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -529,14 +529,14 @@ free: } EXPORT_SYMBOL_GPL(cfg80211_pmsr_report); -void cfg80211_pmsr_free_wk(struct work_struct *work) +static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) { - struct wireless_dev *wdev = container_of(work, struct wireless_dev, - pmsr_free_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); + lockdep_assert_held(&wdev->mtx); + spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { if (req->nl_portid) @@ -546,14 +546,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) spin_unlock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &free_list, list) { - wdev_lock(wdev); rdev_abort_pmsr(rdev, wdev, req); - wdev_unlock(wdev); kfree(req); } } +void cfg80211_pmsr_free_wk(struct work_struct *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + pmsr_free_wk); + + wdev_lock(wdev); + cfg80211_pmsr_process_abort(wdev); + wdev_unlock(wdev); +} + void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) { struct cfg80211_pmsr_request *req; @@ -567,8 +575,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) spin_unlock_bh(&wdev->pmsr_lock); if (found) - schedule_work(&wdev->pmsr_free_wk); - flush_work(&wdev->pmsr_free_wk); + cfg80211_pmsr_process_abort(wdev); + WARN_ON(!list_empty(&wdev->pmsr_list)); } From 4df04ac9b37f278c48bb696289aff8f81226af4b Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 11 Jan 2019 05:50:33 +0200 Subject: [PATCH 0657/1436] drm/omap: dsi: Fix crash in DSI debug dumps Reading any of the DSI debugfs files results in a crash, as wrong pointer is passed to the dump functions, and the dump functions use a wrong pointer. This patch fixes DSI debug dumps. Fixes: f3ed97f9ae7d ("drm/omap: dsi: Simplify debugfs implementation") Signed-off-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190111035120.20668-3-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 00a9c2ab9e6c..277f9dd2ec8c 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -1406,7 +1406,7 @@ static void dsi_pll_disable(struct dss_pll *pll) static int dsi_dump_dsi_clocks(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; struct dss_pll_clock_info *cinfo = &dsi->pll.cinfo; enum dss_clk_source dispc_clk_src, dsi_clk_src; int dsi_module = dsi->module_id; @@ -1467,7 +1467,7 @@ static int dsi_dump_dsi_clocks(struct seq_file *s, void *p) #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS static int dsi_dump_dsi_irqs(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; unsigned long flags; struct dsi_irq_stats stats; @@ -1558,7 +1558,7 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p) static int dsi_dump_dsi_regs(struct seq_file *s, void *p) { - struct dsi_data *dsi = p; + struct dsi_data *dsi = s->private; if (dsi_runtime_get(dsi)) return 0; @@ -5083,15 +5083,15 @@ static int dsi_bind(struct device *dev, struct device *master, void *data) snprintf(name, sizeof(name), "dsi%u_regs", dsi->module_id + 1); dsi->debugfs.regs = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_regs, &dsi); + dsi_dump_dsi_regs, dsi); #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS snprintf(name, sizeof(name), "dsi%u_irqs", dsi->module_id + 1); dsi->debugfs.irqs = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_irqs, &dsi); + dsi_dump_dsi_irqs, dsi); #endif snprintf(name, sizeof(name), "dsi%u_clks", dsi->module_id + 1); dsi->debugfs.clks = dss_debugfs_create_file(dss, name, - dsi_dump_dsi_clocks, &dsi); + dsi_dump_dsi_clocks, dsi); return 0; } From 0940c52742de0d2f70ba687bfd5fe8aa38c5f27d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 11 Jan 2019 05:50:34 +0200 Subject: [PATCH 0658/1436] drm/omap: dsi: Fix OF platform depopulate Commit edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") moved the of_platform_populate() call from dsi_bind() to dsi_probe(), but failed to move the corresponding of_platform_depopulate() from dsi_unbind() to dsi_remove(). This results in OF child devices being potentially removed multiple times. Fix it by placing the of_platform_depopulate() call where it belongs. Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") Signed-off-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190111035120.20668-4-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 277f9dd2ec8c..b5685018d830 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5104,8 +5104,6 @@ static void dsi_unbind(struct device *dev, struct device *master, void *data) dss_debugfs_remove_file(dsi->debugfs.irqs); dss_debugfs_remove_file(dsi->debugfs.regs); - of_platform_depopulate(dev); - WARN_ON(dsi->scp_clk_refcount > 0); dss_pll_unregister(&dsi->pll); @@ -5457,6 +5455,8 @@ static int dsi_remove(struct platform_device *pdev) dsi_uninit_output(dsi); + of_platform_depopulate(&pdev->dev); + pm_runtime_disable(&pdev->dev); if (dsi->vdds_dsi_reg != NULL && dsi->vdds_dsi_enabled) { From 6297388e1eddd2f1345cea5892156223995bcf2d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 11 Jan 2019 05:50:35 +0200 Subject: [PATCH 0659/1436] drm/omap: dsi: Hack-fix DSI bus flags Since commit b4935e3a3cfa ("drm/omap: Store bus flags in the omap_dss_device structure") video mode flags are managed by the omapdss (and later omapdrm) core based on bus flags stored in omap_dss_device. This works fine for all devices whose video modes are set by the omapdss and omapdrm core, but breaks DSI operation as the DSI still uses legacy code paths and sets the DISPC timings manually. To fix the problem properly we should move the DSI encoder to the new encoder model. This will however require a considerable amount of work. Restore DSI operation by adding back video mode flags handling in the DSI encoder driver as a hack in the meantime. Fixes: b4935e3a3cfa ("drm/omap: Store bus flags in the omap_dss_device structure") Signed-off-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190111035120.20668-5-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index b5685018d830..64fb788b6647 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -4751,6 +4751,17 @@ static int dsi_set_config(struct omap_dss_device *dssdev, dsi->vm.flags |= DISPLAY_FLAGS_HSYNC_HIGH; dsi->vm.flags &= ~DISPLAY_FLAGS_VSYNC_LOW; dsi->vm.flags |= DISPLAY_FLAGS_VSYNC_HIGH; + /* + * HACK: These flags should be handled through the omap_dss_device bus + * flags, but this will only be possible when the DSI encoder will be + * converted to the omapdrm-managed encoder model. + */ + dsi->vm.flags &= ~DISPLAY_FLAGS_PIXDATA_NEGEDGE; + dsi->vm.flags |= DISPLAY_FLAGS_PIXDATA_POSEDGE; + dsi->vm.flags &= ~DISPLAY_FLAGS_DE_LOW; + dsi->vm.flags |= DISPLAY_FLAGS_DE_HIGH; + dsi->vm.flags &= ~DISPLAY_FLAGS_SYNC_POSEDGE; + dsi->vm.flags |= DISPLAY_FLAGS_SYNC_NEGEDGE; dss_mgr_set_timings(&dsi->output, &dsi->vm); From 52158f009742b4726121cf51f6d5fd8590ef7996 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Feb 2019 20:03:20 -0800 Subject: [PATCH 0660/1436] selftests/bpf: fix the expected messages Recent changes added extack to program replacement path, expect extack instead of generic messages. Fixes: 01dde20ce04b ("xdp: Provide extack messages when prog attachment failed") Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index d59642e70f56..a564d1a5a1b8 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -842,7 +842,9 @@ try: ret, _, err = sim.set_xdp(obj, "generic", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") - fail(err.count("File exists") != 1, "Replaced driver XDP with generic") + check_extack(err, + "native and generic XDP can't be active at the same time.", + args) ret, _, err = sim.set_xdp(obj, "", force=True, fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") @@ -957,7 +959,8 @@ try: start_test("Test multi-attachment XDP - replace...") ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) - fail(err.count("busy") != 1, "Replaced one of programs without -force") + fail(ret == 0, "Replaced one of programs without -force") + check_extack(err, "XDP program already attached.", args) start_test("Test multi-attachment XDP - detach...") ret, _, err = sim.unset_xdp("drv", force=True, From 9ee963d6a1a03a4302b230cd21476a3c269af284 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Feb 2019 20:03:21 -0800 Subject: [PATCH 0661/1436] net: xdp: allow generic and driver XDP on one interface Since commit a25717d2b604 ("xdp: support simultaneous driver and hw XDP attachment") users can load an XDP program for offload and in native driver mode simultaneously. Allow a similar mix of offload and SKB mode/generic XDP. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index bfa4be42afff..78c3b48392e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7976,11 +7976,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, enum bpf_netdev_command query; struct bpf_prog *prog = NULL; bpf_op_t bpf_op, bpf_chk; + bool offload; int err; ASSERT_RTNL(); - query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; + offload = flags & XDP_FLAGS_HW_MODE; + query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; bpf_op = bpf_chk = ops->ndo_bpf; if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { @@ -7993,8 +7995,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { - if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) { + if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; } @@ -8009,8 +8010,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, if (IS_ERR(prog)) return PTR_ERR(prog); - if (!(flags & XDP_FLAGS_HW_MODE) && - bpf_prog_is_dev_bound(prog->aux)) { + if (!offload && bpf_prog_is_dev_bound(prog->aux)) { NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); bpf_prog_put(prog); return -EINVAL; From d7f356380241ad5d34756eb551fffcf351ac969b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Feb 2019 20:03:22 -0800 Subject: [PATCH 0662/1436] selftests/bpf: print traceback when test fails Figuring out which exact check in test_offload.py takes more time than it should. Print the traceback (to the screen and the logs). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index a564d1a5a1b8..c379b36a5443 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -23,6 +23,7 @@ import string import struct import subprocess import time +import traceback logfile = None log_level = 1 @@ -78,7 +79,9 @@ def fail(cond, msg): if not cond: return print("FAIL: " + msg) - log("FAIL: " + msg, "", level=1) + tb = "".join(traceback.extract_stack().format()) + print(tb) + log("FAIL: " + msg, tb, level=1) os.sys.exit(1) def start_test(msg): From 06ea9e63cd2307213652496df089cc950f9aacc6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Feb 2019 20:03:23 -0800 Subject: [PATCH 0663/1436] selftests/bpf: add test for mixing generic and offload XDP Add simple sanity check for enabling generic and offload XDP, simply reuse the native and offload checks. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 116 +++++++++++--------- 1 file changed, 62 insertions(+), 54 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index c379b36a5443..13fe12d09704 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -603,6 +603,65 @@ def test_spurios_extack(sim, obj, skip_hw, needle): include_stderr=True) check_no_extack(res, needle) +def test_multi_prog(sim, obj, modename, modeid): + start_test("Test multi-attachment XDP - %s + offload..." % + (modename or "default", )) + sim.set_xdp(obj, "offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + fail("prog" not in xdp, "Base program not reported in single program mode") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with one program") + + sim.set_xdp(obj, modename) + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + offloaded2 = sim.dfs_read("bpf_offloaded_id") + + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "Offloaded and other programs have the same id") + fail(offloaded != offloaded2, + "Offload ID changed after loading other program") + + start_test("Test multi-attachment XDP - replace...") + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Replaced one of programs without -force") + check_extack(err, "XDP program already attached.", args) + + if modename == "" or modename == "drv": + othermode = "" if modename == "drv" else "drv" + start_test("Test multi-attachment XDP - detach...") + ret, _, err = sim.unset_xdp(othermode, force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode") + check_extack(err, "program loaded with different flags.", args) + + sim.unset_xdp("offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + + fail(xdp["mode"] != modeid, "Bad mode reported after multiple programs") + fail("prog" not in xdp, + "Base program not reported after multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with remaining programs") + fail(offloaded != "0", "Offload ID reported with only other program left") + + start_test("Test multi-attachment XDP - device remove...") + sim.set_xdp(obj, "offload") + sim.remove() + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + return sim # Parse command line parser = argparse.ArgumentParser() @@ -936,60 +995,9 @@ try: rm(pin_file) bpftool_prog_list_wait(expected=0) - start_test("Test multi-attachment XDP - attach...") - sim.set_xdp(obj, "offload") - xdp = sim.ip_link_show(xdp=True)["xdp"] - offloaded = sim.dfs_read("bpf_offloaded_id") - fail("prog" not in xdp, "Base program not reported in single program mode") - fail(len(ipl["xdp"]["attached"]) != 1, - "Wrong attached program count with one program") - - sim.set_xdp(obj, "") - two_xdps = sim.ip_link_show(xdp=True)["xdp"] - offloaded2 = sim.dfs_read("bpf_offloaded_id") - - fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") - fail("prog" in two_xdps, "Base program reported in multi program mode") - fail(xdp["attached"][0] not in two_xdps["attached"], - "Offload program not reported after driver activated") - fail(len(two_xdps["attached"]) != 2, - "Wrong attached program count with two programs") - fail(two_xdps["attached"][0]["prog"]["id"] == - two_xdps["attached"][1]["prog"]["id"], - "offloaded and drv programs have the same id") - fail(offloaded != offloaded2, - "offload ID changed after loading driver program") - - start_test("Test multi-attachment XDP - replace...") - ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) - fail(ret == 0, "Replaced one of programs without -force") - check_extack(err, "XDP program already attached.", args) - - start_test("Test multi-attachment XDP - detach...") - ret, _, err = sim.unset_xdp("drv", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) - - sim.unset_xdp("offload") - xdp = sim.ip_link_show(xdp=True)["xdp"] - offloaded = sim.dfs_read("bpf_offloaded_id") - - fail(xdp["mode"] != 1, "Bad mode reported after multiple programs") - fail("prog" not in xdp, - "Base program not reported after multi program mode") - fail(xdp["attached"][0] not in two_xdps["attached"], - "Offload program not reported after driver activated") - fail(len(ipl["xdp"]["attached"]) != 1, - "Wrong attached program count with remaining programs") - fail(offloaded != "0", "offload ID reported with only driver program left") - - start_test("Test multi-attachment XDP - device remove...") - sim.set_xdp(obj, "offload") - sim.remove() - - sim = NetdevSim() - sim.set_ethtool_tc_offloads(True) + sim = test_multi_prog(sim, obj, "", 1) + sim = test_multi_prog(sim, obj, "drv", 1) + sim = test_multi_prog(sim, obj, "generic", 2) start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() From 42a40e840d4499d06371096ec96fdb7b7018fabf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Feb 2019 20:03:24 -0800 Subject: [PATCH 0664/1436] selftests/bpf: test reading the offloaded program Test adding the offloaded program after the other program is already installed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 29 ++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 13fe12d09704..84bea3985d64 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -592,6 +592,15 @@ def check_verifier_log(output, reference): return fail(True, "Missing or incorrect message from netdevsim in verifier log") +def check_multi_basic(two_xdps): + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "Offloaded and other programs have the same id") + def test_spurios_extack(sim, obj, skip_hw, needle): res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, include_stderr=True) @@ -615,17 +624,12 @@ def test_multi_prog(sim, obj, modename, modeid): sim.set_xdp(obj, modename) two_xdps = sim.ip_link_show(xdp=True)["xdp"] - offloaded2 = sim.dfs_read("bpf_offloaded_id") - fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") - fail("prog" in two_xdps, "Base program reported in multi program mode") fail(xdp["attached"][0] not in two_xdps["attached"], "Offload program not reported after other activated") - fail(len(two_xdps["attached"]) != 2, - "Wrong attached program count with two programs") - fail(two_xdps["attached"][0]["prog"]["id"] == - two_xdps["attached"][1]["prog"]["id"], - "Offloaded and other programs have the same id") + check_multi_basic(two_xdps) + + offloaded2 = sim.dfs_read("bpf_offloaded_id") fail(offloaded != offloaded2, "Offload ID changed after loading other program") @@ -655,8 +659,15 @@ def test_multi_prog(sim, obj, modename, modeid): "Wrong attached program count with remaining programs") fail(offloaded != "0", "Offload ID reported with only other program left") - start_test("Test multi-attachment XDP - device remove...") + start_test("Test multi-attachment XDP - reattach...") sim.set_xdp(obj, "offload") + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Other program not reported after offload activated") + check_multi_basic(two_xdps) + + start_test("Test multi-attachment XDP - device remove...") sim.remove() sim = NetdevSim() From a5f2d082702c7c3223e56249dfb1ad20746139e1 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Wed, 6 Feb 2019 10:47:23 +0900 Subject: [PATCH 0665/1436] tools: bpftool: doc, fix incorrect text Documentation about cgroup, feature, prog uses wrong header 'MAP COMMANDS' while listing commands. This patch corrects the header in respective doc files. Signed-off-by: Prashant Bhole Acked-by: Yonghong Song Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 ++-- tools/bpf/bpftool/Documentation/bpftool-feature.rst | 4 ++-- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index d43fce568ef7..9bb9ace54ba8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -17,8 +17,8 @@ SYNOPSIS *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } -MAP COMMANDS -============= +CGROUP COMMANDS +=============== | **bpftool** **cgroup { show | list }** *CGROUP* | **bpftool** **cgroup tree** [*CGROUP_ROOT*] diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 8d489a26e3c9..82de03dd8f52 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -16,8 +16,8 @@ SYNOPSIS *COMMANDS* := { **probe** | **help** } -MAP COMMANDS -============= +FEATURE COMMANDS +================ | **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]] | **bpftool** **feature help** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 13b56102f528..7e59495cb028 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -18,7 +18,7 @@ SYNOPSIS { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **loadall** | **help** } -MAP COMMANDS +PROG COMMANDS ============= | **bpftool** **prog { show | list }** [*PROG*] From dd9cef43c222df7c0d76d34451808e789952379d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 5 Feb 2019 15:12:34 -0200 Subject: [PATCH 0666/1436] bpf: test_maps: fix possible out of bound access warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling test_maps selftest with GCC-8, it warns that an array might be indexed with a negative value, which could cause a negative out of bound access, depending on parameters of the function. This is the GCC-8 warning: gcc -Wall -O2 -I../../../include/uapi -I../../../lib -I../../../lib/bpf -I../../../../include/generated -DHAVE_GENHDR -I../../../include test_maps.c /home/breno/Devel/linux/tools/testing/selftests/bpf/libbpf.a -lcap -lelf -lrt -lpthread -o /home/breno/Devel/linux/tools/testing/selftests/bpf/test_maps In file included from test_maps.c:16: test_maps.c: In function ‘run_all_tests’: test_maps.c:1079:10: warning: array subscript -1 is below array bounds of ‘pid_t[ + 1]’ [-Warray-bounds] assert(waitpid(pid[i], &status, 0) == pid[i]); ^~~~~~~~~~~~~~~~~~~~~~~~~~~ test_maps.c:1059:6: warning: array subscript -1 is below array bounds of ‘pid_t[ + 1]’ [-Warray-bounds] pid[i] = fork(); ~~~^~~ This patch simply guarantees that the task(s) variables are unsigned, thus, they could never be a negative number (which they are not in current code anyway), hence avoiding an out of bound access warning. Signed-off-by: Breno Leitao Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index e7798dd97f4b..3c627771f965 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -45,7 +45,7 @@ static int map_flags; } \ }) -static void test_hashmap(int task, void *data) +static void test_hashmap(unsigned int task, void *data) { long long key, next_key, first_key, value; int fd; @@ -135,7 +135,7 @@ static void test_hashmap(int task, void *data) close(fd); } -static void test_hashmap_sizes(int task, void *data) +static void test_hashmap_sizes(unsigned int task, void *data) { int fd, i, j; @@ -155,7 +155,7 @@ static void test_hashmap_sizes(int task, void *data) } } -static void test_hashmap_percpu(int task, void *data) +static void test_hashmap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); BPF_DECLARE_PERCPU(long, value); @@ -282,7 +282,7 @@ static int helper_fill_hashmap(int max_entries) return fd; } -static void test_hashmap_walk(int task, void *data) +static void test_hashmap_walk(unsigned int task, void *data) { int fd, i, max_entries = 1000; long long key, value, next_key; @@ -353,7 +353,7 @@ static void test_hashmap_zero_seed(void) close(second); } -static void test_arraymap(int task, void *data) +static void test_arraymap(unsigned int task, void *data) { int key, next_key, fd; long long value; @@ -408,7 +408,7 @@ static void test_arraymap(int task, void *data) close(fd); } -static void test_arraymap_percpu(int task, void *data) +static void test_arraymap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); BPF_DECLARE_PERCPU(long, values); @@ -504,7 +504,7 @@ static void test_arraymap_percpu_many_keys(void) close(fd); } -static void test_devmap(int task, void *data) +static void test_devmap(unsigned int task, void *data) { int fd; __u32 key, value; @@ -519,7 +519,7 @@ static void test_devmap(int task, void *data) close(fd); } -static void test_queuemap(int task, void *data) +static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; __u32 vals[MAP_SIZE + MAP_SIZE/2], val; @@ -577,7 +577,7 @@ static void test_queuemap(int task, void *data) close(fd); } -static void test_stackmap(int task, void *data) +static void test_stackmap(unsigned int task, void *data) { const int MAP_SIZE = 32; __u32 vals[MAP_SIZE + MAP_SIZE/2], val; @@ -642,7 +642,7 @@ static void test_stackmap(int task, void *data) #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o" #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" #define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o" -static void test_sockmap(int tasks, void *data) +static void test_sockmap(unsigned int tasks, void *data) { struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break; int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break; @@ -1268,10 +1268,11 @@ static void test_map_large(void) } #define run_parallel(N, FN, DATA) \ - printf("Fork %d tasks to '" #FN "'\n", N); \ + printf("Fork %u tasks to '" #FN "'\n", N); \ __run_parallel(N, FN, DATA) -static void __run_parallel(int tasks, void (*fn)(int task, void *data), +static void __run_parallel(unsigned int tasks, + void (*fn)(unsigned int task, void *data), void *data) { pid_t pid[tasks]; @@ -1312,7 +1313,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void test_update_delete(int fn, void *data) +static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; From d49d92ace4974510dd5845ff91860823cf30dc08 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 5 Feb 2019 23:42:23 +0800 Subject: [PATCH 0667/1436] mmc: sunxi: Disable HS-DDR mode for H5 eMMC controller by default Some H5 boards seem to not have proper trace lengths for eMMC to be able to use the default setting for the delay chains under HS-DDR mode. These include the Bananapi M2+ H5 and NanoPi NEO Core2. However the Libre Computer ALL-H3-CC-H5 works just fine. For the H5 (at least for now), default to not enabling HS-DDR modes in the driver, and expect the device tree to signal HS-DDR capability on boards that work. Reported-by: Chris Blake Fixes: 07bafc1e3536 ("mmc: sunxi: Use new timing mode for A64 eMMC controller") Cc: Acked-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 279e326e397e..7415af8c8ff6 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1399,7 +1399,16 @@ static int sunxi_mmc_probe(struct platform_device *pdev) mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ; - if (host->cfg->clk_delays || host->use_new_timings) + /* + * Some H5 devices do not have signal traces precise enough to + * use HS DDR mode for their eMMC chips. + * + * We still enable HS DDR modes for all the other controller + * variants that support them. + */ + if ((host->cfg->clk_delays || host->use_new_timings) && + !of_device_is_compatible(pdev->dev.of_node, + "allwinner,sun50i-h5-emmc")) mmc->caps |= MMC_CAP_1_8V_DDR | MMC_CAP_3_3V_DDR; ret = mmc_of_parse(mmc); From d6f11e7d91f2ac85f66194fe3ef8789b49901d64 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 5 Feb 2019 23:42:24 +0800 Subject: [PATCH 0668/1436] mmc: sunxi: Filter out unsupported modes declared in the device tree The MMC device tree bindings include properties used to signal various signalling speed modes. Until now the sunxi driver was accepting them without any further filtering, while the sunxi device trees were not actually using them. Since some of the H5 boards can not run at higher speed modes stably, we are resorting to declaring the higher speed modes per-board. Regardless, having boards declare modes and blindly following them, even without proper support in the driver, is generally a bad thing. Filter out all unsupported modes from the capabilities mask after the device tree properties have been parsed. Cc: Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 7415af8c8ff6..70fadc976795 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1415,6 +1415,21 @@ static int sunxi_mmc_probe(struct platform_device *pdev) if (ret) goto error_free_dma; + /* + * If we don't support delay chains in the SoC, we can't use any + * of the higher speed modes. Mask them out in case the device + * tree specifies the properties for them, which gets added to + * the caps by mmc_of_parse() above. + */ + if (!(host->cfg->clk_delays || host->use_new_timings)) { + mmc->caps &= ~(MMC_CAP_3_3V_DDR | MMC_CAP_1_8V_DDR | + MMC_CAP_1_2V_DDR | MMC_CAP_UHS); + mmc->caps2 &= ~MMC_CAP2_HS200; + } + + /* TODO: This driver doesn't support HS400 mode yet */ + mmc->caps2 &= ~MMC_CAP2_HS400; + ret = sunxi_mmc_init_host(host); if (ret) goto error_free_dma; From ec51f8ee1e63498e9f521ec0e5a6d04622bb2c67 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 5 Feb 2019 14:13:35 -0500 Subject: [PATCH 0669/1436] aio: initialize kiocb private in case any filesystems expect it. A recent optimization had left private uninitialized. Fixes: 2bc4ca9bb600 ("aio: don't zero entire aio_kiocb aio_get_req()") Reviewed-by: Christoph Hellwig Signed-off-by: Mike Marshall Signed-off-by: Jens Axboe --- fs/aio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/aio.c b/fs/aio.c index b906ff70c90f..aaaaf4d12c73 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) if (unlikely(!req->ki_filp)) return -EBADF; req->ki_complete = aio_complete_rw; + req->private = NULL; req->ki_pos = iocb->aio_offset; req->ki_flags = iocb_flags(req->ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) From e7ad43c3eda6a1690c4c3c341f95dc1c6898da83 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 28 Jan 2019 09:46:07 -0700 Subject: [PATCH 0670/1436] nvme: lock NS list changes while handling command effects If a controller supports the NS Change Notification, the namespace scan_work is automatically triggered after attaching a new namespace. Occasionally the namespace scan_work may append the new namespace to the list before the admin command effects handling is completed. The effects handling unfreezes namespaces, but if it unfreezes the newly attached namespace, its request_queue freeze depth will be off and we'll hit the warning in blk_mq_unfreeze_queue(). On the next namespace add, we will fail to freeze that queue due to the previous bad accounting and deadlock waiting for frozen. Fix that by preventing scan work from altering the namespace list while command effects handling needs to pair freeze with unfreeze. Reported-by: Wen Xiong Tested-by: Wen Xiong Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 150e49723c15..6a9dd68c0f4f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1253,6 +1253,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * effects say only one namespace is affected. */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1281,8 +1282,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) */ if (effects & NVME_CMD_EFFECTS_LBCC) nvme_update_formats(ctrl); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + mutex_unlock(&ctrl->scan_lock); + } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) @@ -3401,6 +3404,7 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { @@ -3409,6 +3413,7 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); out_free_id: + mutex_unlock(&ctrl->scan_lock); kfree(id); down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); @@ -3652,6 +3657,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); + mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab961bdeea89..c4a1bb41abf0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -154,6 +154,7 @@ struct nvme_ctrl { enum nvme_ctrl_state state; bool identified; spinlock_t lock; + struct mutex scan_lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct request_queue *connect_q; From 5c959d73dba6495ec01d04c206ee679d61ccb2b0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 23 Jan 2019 18:46:11 -0700 Subject: [PATCH 0671/1436] nvme-pci: fix rapid add remove sequence A surprise removal may fail to tear down request queues if it is racing with the initial asynchronous probe. If that happens, the remove path won't see the queue resources to tear down, and the controller reset path may create a new request queue on a removed device, but will not be able to make forward progress, deadlocking the pci removal. Protect setting up non-blocking resources from a shutdown by holding the same mutex, and transition to the CONNECTING state after these resources are initialized so the probe path may see the dead controller state before dispatching new IO. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202081 Reported-by: Alex Gagniuc Signed-off-by: Keith Busch Tested-by: Alex Gagniuc Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9bc585415d9b..022ea1ee63f8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2557,16 +2557,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); - /* - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the - * initializing procedure here. - */ - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { - dev_warn(dev->ctrl.device, - "failed to mark controller CONNECTING\n"); - goto out; - } - + mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) goto out; @@ -2585,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_segments = NVME_MAX_SEGS; + mutex_unlock(&dev->shutdown_lock); + + /* + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + goto out; + } result = nvme_init_identify(&dev->ctrl); if (result) From c16e12010060c6c7a31f08b4a99513064cb53b7d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 5 Feb 2019 10:22:27 -0600 Subject: [PATCH 0672/1436] ASoC: dapm: fix out-of-bounds accesses to DAPM lookup tables KASAN reports and additional traces point to out-of-bounds accesses to the dapm_up_seq and dapm_down_seq lookup tables. The indices used are larger than the array definition. Fix by adding missing entries for the new widget types in these two lookup tables, and align them with PGA values. Also the sequences for the following widgets were not defined. Since their values defaulted to zero, assign them explicitly snd_soc_dapm_input snd_soc_dapm_output snd_soc_dapm_vmid snd_soc_dapm_siggen snd_soc_dapm_sink Fixes: 8a70b4544ef4 ('ASoC: dapm: Add new widget type for constructing DAPM graphs on DSPs.'). Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 2c4c13419539..20bad755888b 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -70,12 +70,16 @@ static int dapm_up_seq[] = { [snd_soc_dapm_clock_supply] = 1, [snd_soc_dapm_supply] = 2, [snd_soc_dapm_micbias] = 3, + [snd_soc_dapm_vmid] = 3, [snd_soc_dapm_dai_link] = 2, [snd_soc_dapm_dai_in] = 4, [snd_soc_dapm_dai_out] = 4, [snd_soc_dapm_aif_in] = 4, [snd_soc_dapm_aif_out] = 4, [snd_soc_dapm_mic] = 5, + [snd_soc_dapm_siggen] = 5, + [snd_soc_dapm_input] = 5, + [snd_soc_dapm_output] = 5, [snd_soc_dapm_mux] = 6, [snd_soc_dapm_demux] = 6, [snd_soc_dapm_dac] = 7, @@ -83,11 +87,19 @@ static int dapm_up_seq[] = { [snd_soc_dapm_mixer] = 8, [snd_soc_dapm_mixer_named_ctl] = 8, [snd_soc_dapm_pga] = 9, + [snd_soc_dapm_buffer] = 9, + [snd_soc_dapm_scheduler] = 9, + [snd_soc_dapm_effect] = 9, + [snd_soc_dapm_src] = 9, + [snd_soc_dapm_asrc] = 9, + [snd_soc_dapm_encoder] = 9, + [snd_soc_dapm_decoder] = 9, [snd_soc_dapm_adc] = 10, [snd_soc_dapm_out_drv] = 11, [snd_soc_dapm_hp] = 11, [snd_soc_dapm_spk] = 11, [snd_soc_dapm_line] = 11, + [snd_soc_dapm_sink] = 11, [snd_soc_dapm_kcontrol] = 12, [snd_soc_dapm_post] = 13, }; @@ -100,13 +112,25 @@ static int dapm_down_seq[] = { [snd_soc_dapm_spk] = 3, [snd_soc_dapm_line] = 3, [snd_soc_dapm_out_drv] = 3, + [snd_soc_dapm_sink] = 3, [snd_soc_dapm_pga] = 4, + [snd_soc_dapm_buffer] = 4, + [snd_soc_dapm_scheduler] = 4, + [snd_soc_dapm_effect] = 4, + [snd_soc_dapm_src] = 4, + [snd_soc_dapm_asrc] = 4, + [snd_soc_dapm_encoder] = 4, + [snd_soc_dapm_decoder] = 4, [snd_soc_dapm_switch] = 5, [snd_soc_dapm_mixer_named_ctl] = 5, [snd_soc_dapm_mixer] = 5, [snd_soc_dapm_dac] = 6, [snd_soc_dapm_mic] = 7, + [snd_soc_dapm_siggen] = 7, + [snd_soc_dapm_input] = 7, + [snd_soc_dapm_output] = 7, [snd_soc_dapm_micbias] = 8, + [snd_soc_dapm_vmid] = 8, [snd_soc_dapm_mux] = 9, [snd_soc_dapm_demux] = 9, [snd_soc_dapm_aif_in] = 10, From d9111d36024de07784f2e1ba2ccf70b16035f378 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Feb 2019 09:46:43 +0900 Subject: [PATCH 0673/1436] ASoC: rsnd: fixup rsnd_ssi_master_clk_start() user count check commit 4d230d1271064 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") added new rsnd_ssi_prepare() and moved rsnd_ssi_master_clk_start() to .prepare. But, ssi user count (= ssi->usrcnt) is incremented at .init (= rsnd_ssi_init()). Because of these timing exchange, ssi->usrcnt check at rsnd_ssi_master_clk_start() should be adjusted. Otherwise, 2nd master clock setup will be no check. This patch fixup this issue. Fixes: commit 4d230d1271064 ("ASoC: rsnd: fixup not to call clk_get/set under non-atomic") Reported-by: Yusuke Goda Reported-by: Valentine Barshak Signed-off-by: Kuninori Morimoto Tested-by: Yusuke Goda Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 45ef295743ec..f5afab631abb 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -286,7 +286,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, if (rsnd_ssi_is_multi_slave(mod, io)) return 0; - if (ssi->usrcnt > 1) { + if (ssi->usrcnt > 0) { if (ssi->rate != rate) { dev_err(dev, "SSI parent/child should use same rate\n"); return -EINVAL; From 1878f0dcbff0cd07f62602deb160a44d69a8f146 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Wed, 6 Feb 2019 07:36:40 +0100 Subject: [PATCH 0674/1436] net: phy: provide full set of accessor functions to MMD registers This adds full set of locked and unlocked accessor functions to read and write PHY MMD registers and/or bitfields. Set of functions exactly matches what is already available for PHY legacy registers. Signed-off-by: Nikita Yushchenko Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 146 ++++++++++++++++++++++++++++++------- include/linux/phy.h | 134 ++++++++++++++++++++++++++++------ 2 files changed, 229 insertions(+), 51 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 909b3344babf..7d6aad287f84 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -414,15 +414,15 @@ static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad, } /** - * phy_read_mmd - Convenience function for reading a register + * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. * @phydev: The phy_device struct * @devad: The MMD to read from (0..31) * @regnum: The register on the MMD to read (0..65535) * - * Same rules as for phy_read(); + * Same rules as for __phy_read(); */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) { int val; @@ -434,22 +434,80 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) } else if (phydev->is_c45) { u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - val = mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); + val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); } else { struct mii_bus *bus = phydev->mdio.bus; int phy_addr = phydev->mdio.addr; - mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, phy_addr, devad, regnum); /* Read the content of the MMD's selected register */ val = __mdiobus_read(bus, phy_addr, MII_MMD_DATA); - mutex_unlock(&bus->mdio_lock); } return val; } +EXPORT_SYMBOL(__phy_read_mmd); + +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_read_mmd(phydev, devad, regnum); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} EXPORT_SYMBOL(phy_read_mmd); +/** + * __phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for __phy_write(); + */ +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) +{ + int ret; + + if (regnum > (u16)~0 || devad > 32) + return -EINVAL; + + if (phydev->drv->write_mmd) { + ret = phydev->drv->write_mmd(phydev, devad, regnum, val); + } else if (phydev->is_c45) { + u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); + + ret = __mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, + addr, val); + } else { + struct mii_bus *bus = phydev->mdio.bus; + int phy_addr = phydev->mdio.addr; + + mmd_phy_indirect(bus, phy_addr, devad, regnum); + + /* Write the data into MMD's selected register */ + __mdiobus_write(bus, phy_addr, MII_MMD_DATA, val); + + ret = 0; + } + return ret; +} +EXPORT_SYMBOL(__phy_write_mmd); + /** * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. @@ -464,29 +522,10 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) { int ret; - if (regnum > (u16)~0 || devad > 32) - return -EINVAL; + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_write_mmd(phydev, devad, regnum, val); + mutex_unlock(&phydev->mdio.bus->mdio_lock); - if (phydev->drv->write_mmd) { - ret = phydev->drv->write_mmd(phydev, devad, regnum, val); - } else if (phydev->is_c45) { - u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - - ret = mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, - addr, val); - } else { - struct mii_bus *bus = phydev->mdio.bus; - int phy_addr = phydev->mdio.addr; - - mutex_lock(&bus->mdio_lock); - mmd_phy_indirect(bus, phy_addr, devad, regnum); - - /* Write the data into MMD's selected register */ - __mdiobus_write(bus, phy_addr, MII_MMD_DATA, val); - mutex_unlock(&bus->mdio_lock); - - ret = 0; - } return ret; } EXPORT_SYMBOL(phy_write_mmd); @@ -538,6 +577,57 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) } EXPORT_SYMBOL_GPL(phy_modify); +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * Unlocked helper function which allows a MMD register to be modified as + * new register value = (old register value & ~mask) | set + */ +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + ret = __phy_read_mmd(phydev, devad, regnum); + if (ret < 0) + return ret; + + ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd); + +/** + * phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd); + static int __phy_read_page(struct phy_device *phydev) { return phydev->drv->read_page(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 70f83d0d7469..237dd035858a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -692,17 +692,6 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_linkmode(struct phy_device *phydev); -/** - * phy_read_mmd - Convenience function for reading a register - * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); - */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); - /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -758,9 +747,60 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) val); } +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * __phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for __phy_read(); + */ +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + +/** + * __phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for __phy_write(); + */ +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); + /** * __phy_set_bits - Convenience function for setting bits in a PHY register * @phydev: the phy_device struct @@ -810,6 +850,66 @@ static inline int phy_clear_bits(struct phy_device *phydev, u32 regnum, u16 val) return phy_modify(phydev, regnum, val, 0); } +/** + * __phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * __phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, val, 0); +} + +/** + * phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + */ +static inline int phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + */ +static inline int phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, val, 0); +} + /** * phy_interrupt_is_valid - Convenience function for testing a given PHY irq * @phydev: the phy_device struct @@ -886,18 +986,6 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) return phydev->is_pseudo_fixed_link; } -/** - * phy_write_mmd - Convenience function for writing a register - * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); - */ -int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); - int phy_save_page(struct phy_device *phydev); int phy_select_page(struct phy_device *phydev, int page); int phy_restore_page(struct phy_device *phydev, int oldpage, int ret); From b52c018ddccfed4fded59546c502ddb1ea8c7ba1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Feb 2019 07:38:43 +0100 Subject: [PATCH 0675/1436] net: phy: make use of new MMD accessors Make use of the new MMD accessors. v2: - fix SoB Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/dp83867.c | 47 +++++++++++++----------------------- drivers/net/phy/dp83tc811.c | 15 +++--------- drivers/net/phy/marvell10g.c | 38 ++++++++--------------------- drivers/net/phy/phy-c45.c | 21 +++------------- drivers/net/phy/phy.c | 11 +++------ 5 files changed, 38 insertions(+), 94 deletions(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 8a8d9f606b3e..fc09c5c1a4de 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -127,17 +127,13 @@ static int dp83867_config_port_mirroring(struct phy_device *phydev) { struct dp83867_private *dp83867 = (struct dp83867_private *)phydev->priv; - u16 val; - - val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4); if (dp83867->port_mirroring == DP83867_PORT_MIRROING_EN) - val |= DP83867_CFG4_PORT_MIRROR_EN; + phy_set_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, + DP83867_CFG4_PORT_MIRROR_EN); else - val &= ~DP83867_CFG4_PORT_MIRROR_EN; - - phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, val); - + phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, + DP83867_CFG4_PORT_MIRROR_EN); return 0; } @@ -222,11 +218,9 @@ static int dp83867_config_init(struct phy_device *phydev) } /* RX_DV/RX_CTRL strapped in mode 1 or mode 2 workaround */ - if (dp83867->rxctrl_strap_quirk) { - val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4); - val &= ~BIT(7); - phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, val); - } + if (dp83867->rxctrl_strap_quirk) + phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, + BIT(7)); if (phy_interface_is_rgmii(phydev)) { val = phy_read(phydev, MII_DP83867_PHYCTRL); @@ -275,17 +269,11 @@ static int dp83867_config_init(struct phy_device *phydev) phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL, delay); - if (dp83867->io_impedance >= 0) { - val = phy_read_mmd(phydev, DP83867_DEVADDR, - DP83867_IO_MUX_CFG); - - val &= ~DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL; - val |= dp83867->io_impedance & - DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL; - - phy_write_mmd(phydev, DP83867_DEVADDR, - DP83867_IO_MUX_CFG, val); - } + if (dp83867->io_impedance >= 0) + phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, + DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL, + dp83867->io_impedance & + DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL); } /* Enable Interrupt output INT_OE in CFG3 register */ @@ -299,12 +287,11 @@ static int dp83867_config_init(struct phy_device *phydev) dp83867_config_port_mirroring(phydev); /* Clock output selection if muxing property is set */ - if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) { - val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG); - val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK; - val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT); - phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val); - } + if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) + phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, + DP83867_IO_MUX_CFG_CLK_O_SEL_MASK, + dp83867->clk_output_sel << + DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT); return 0; } diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index da13356999e5..e9704af1d239 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -144,11 +144,8 @@ static int dp83811_set_wol(struct phy_device *phydev, phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, value); } else { - value = phy_read_mmd(phydev, DP83811_DEVADDR, - MII_DP83811_WOL_CFG); - value &= ~DP83811_WOL_EN; - phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, - value); + phy_clear_bits_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, + DP83811_WOL_EN); } return 0; @@ -328,14 +325,10 @@ static int dp83811_suspend(struct phy_device *phydev) static int dp83811_resume(struct phy_device *phydev) { - int value; - genphy_resume(phydev); - value = phy_read_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG); - - phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, value | - DP83811_WOL_CLR_INDICATION); + phy_set_bits_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG, + DP83811_WOL_CLR_INDICATION); return 0; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 38cfc923a88a..5531f9cabc63 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -58,24 +58,6 @@ struct mv3310_priv { char *hwmon_name; }; -static int mv3310_modify(struct phy_device *phydev, int devad, u16 reg, - u16 mask, u16 bits) -{ - int old, val, ret; - - old = phy_read_mmd(phydev, devad, reg); - if (old < 0) - return old; - - val = (old & ~mask) | (bits & mask); - if (val == old) - return 0; - - ret = phy_write_mmd(phydev, devad, reg, val); - - return ret < 0 ? ret : 1; -} - #ifdef CONFIG_HWMON static umode_t mv3310_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, @@ -159,8 +141,8 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) return ret; val = enable ? MV_V2_TEMP_CTRL_SAMPLE : MV_V2_TEMP_CTRL_DISABLE; - ret = mv3310_modify(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, - MV_V2_TEMP_CTRL_MASK, val); + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, + MV_V2_TEMP_CTRL_MASK, val); return ret < 0 ? ret : 0; } @@ -363,18 +345,18 @@ static int mv3310_config_aneg(struct phy_device *phydev) linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, - ADVERTISE_ALL | ADVERTISE_100BASE4 | - ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - linkmode_adv_to_mii_adv_t(phydev->advertising)); + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, + ADVERTISE_ALL | ADVERTISE_100BASE4 | + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, + linkmode_adv_to_mii_adv_t(phydev->advertising)); if (ret < 0) return ret; if (ret > 0) changed = true; reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); - ret = mv3310_modify(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, - ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); if (ret < 0) return ret; if (ret > 0) @@ -387,8 +369,8 @@ static int mv3310_config_aneg(struct phy_device *phydev) else reg = 0; - ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, reg); + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV10G, reg); if (ret < 0) return ret; if (ret > 0) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 03af927fa5ad..519866679c91 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -75,15 +75,9 @@ EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); */ int genphy_c45_an_disable_aneg(struct phy_device *phydev) { - int val; - val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); - if (val < 0) - return val; - - val &= ~(MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART); - - return phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, val); + return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, + MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART); } EXPORT_SYMBOL_GPL(genphy_c45_an_disable_aneg); @@ -97,15 +91,8 @@ EXPORT_SYMBOL_GPL(genphy_c45_an_disable_aneg); */ int genphy_c45_restart_aneg(struct phy_device *phydev) { - int val; - - val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); - if (val < 0) - return val; - - val |= MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART; - - return phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, val); + return phy_set_bits_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, + MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART); } EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d12aa512b7f5..89ead29cec68 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1060,17 +1060,12 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) if (!phy_check_valid(phydev->speed, phydev->duplex, common)) goto eee_exit_err; - if (clk_stop_enable) { + if (clk_stop_enable) /* Configure the PHY to stop receiving xMII * clock while it is signaling LPI. */ - int val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1); - if (val < 0) - return val; - - val |= MDIO_PCS_CTRL1_CLKSTOP_EN; - phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, val); - } + phy_set_bits_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, + MDIO_PCS_CTRL1_CLKSTOP_EN); return 0; /* EEE supported */ } From 45b13b424faafb81c8c44541f093a682fdabdefc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 6 Feb 2019 14:52:53 +0300 Subject: [PATCH 0676/1436] x86/boot/compressed/64: Do not corrupt EDX on EFER.LME=1 setting RDMSR in the trampoline code overwrites EDX but that register is used to indicate whether 5-level paging has to be enabled and if clobbered, leads to failure to boot on a 5-level paging machine. Preserve EDX on the stack while we are dealing with EFER. Fixes: b677dfae5aa1 ("x86/boot/compressed/64: Set EFER.LME=1 in 32-bit trampoline before returning to long mode") Reported-by: Kyle D Pelton Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Cc: dave.hansen@linux.intel.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Wei Huang Cc: x86-ml Link: https://lkml.kernel.org/r/20190206115253.1907-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/head_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f105ae8651c9..f62e347862cc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -602,10 +602,12 @@ ENTRY(trampoline_32bit_src) 3: /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ pushl %ecx + pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax wrmsr + popl %edx popl %ecx /* Enable PAE and LA57 (if required) paging modes */ From 8f2566225ae2d62d532bb1810ed74fa4bbc5bbdb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:43 +0100 Subject: [PATCH 0677/1436] flow_offload: add flow_rule and flow_match structures and use them This patch wraps the dissector key and mask - that flower uses to represent the matching side - around the flow_match structure. To avoid a follow up patch that would edit the same LoCs in the drivers, this patch also wraps this new flow match structure around the flow rule object. This new structure will also contain the flow actions in follow up patches. This introduces two new interfaces: bool flow_rule_match_key(rule, dissector_id) that returns true if a given matching key is set on, and: flow_rule_match_XYZ(rule, &match); To fetch the matching side XYZ into the match container structure, to retrieve the key and the mask with one single call. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 172 ++++---- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 190 ++++---- drivers/net/ethernet/intel/i40e/i40e_main.c | 176 +++----- drivers/net/ethernet/intel/iavf/iavf_main.c | 191 ++++----- drivers/net/ethernet/intel/igb/igb_main.c | 64 ++- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 68 ++- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 380 +++++++--------- .../ethernet/mellanox/mlxsw/spectrum_flower.c | 202 ++++----- .../ethernet/netronome/nfp/flower/action.c | 11 +- .../net/ethernet/netronome/nfp/flower/match.c | 405 +++++++++--------- .../ethernet/netronome/nfp/flower/offload.c | 147 +++---- .../net/ethernet/qlogic/qede/qede_filter.c | 81 ++-- include/net/flow_offload.h | 115 +++++ include/net/pkt_cls.h | 11 +- net/core/Makefile | 2 +- net/core/flow_offload.c | 143 +++++++ net/sched/cls_flower.c | 47 +- 17 files changed, 1196 insertions(+), 1209 deletions(-) create mode 100644 include/net/flow_offload.h create mode 100644 net/core/flow_offload.c diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index c683b5e96b1d..90a2170c5138 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -177,18 +177,12 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, return 0; } -#define GET_KEY(flow_cmd, key_type) \ - skb_flow_dissector_target((flow_cmd)->dissector, key_type,\ - (flow_cmd)->key) -#define GET_MASK(flow_cmd, key_type) \ - skb_flow_dissector_target((flow_cmd)->dissector, key_type,\ - (flow_cmd)->mask) - static int bnxt_tc_parse_flow(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd, struct bnxt_tc_flow *flow) { - struct flow_dissector *dissector = tc_flow_cmd->dissector; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(tc_flow_cmd); + struct flow_dissector *dissector = rule->match.dissector; /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */ if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 || @@ -198,140 +192,120 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, return -EOPNOTSUPP; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC); - struct flow_dissector_key_basic *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - flow->l2_key.ether_type = key->n_proto; - flow->l2_mask.ether_type = mask->n_proto; + flow_rule_match_basic(rule, &match); + flow->l2_key.ether_type = match.key->n_proto; + flow->l2_mask.ether_type = match.mask->n_proto; - if (key->n_proto == htons(ETH_P_IP) || - key->n_proto == htons(ETH_P_IPV6)) { - flow->l4_key.ip_proto = key->ip_proto; - flow->l4_mask.ip_proto = mask->ip_proto; + if (match.key->n_proto == htons(ETH_P_IP) || + match.key->n_proto == htons(ETH_P_IPV6)) { + flow->l4_key.ip_proto = match.key->ip_proto; + flow->l4_mask.ip_proto = match.mask->ip_proto; } } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS); - struct flow_dissector_key_eth_addrs *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS; - ether_addr_copy(flow->l2_key.dmac, key->dst); - ether_addr_copy(flow->l2_mask.dmac, mask->dst); - ether_addr_copy(flow->l2_key.smac, key->src); - ether_addr_copy(flow->l2_mask.smac, mask->src); + ether_addr_copy(flow->l2_key.dmac, match.key->dst); + ether_addr_copy(flow->l2_mask.dmac, match.mask->dst); + ether_addr_copy(flow->l2_key.smac, match.key->src); + ether_addr_copy(flow->l2_mask.smac, match.mask->src); } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN); - struct flow_dissector_key_vlan *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + flow_rule_match_vlan(rule, &match); flow->l2_key.inner_vlan_tci = - cpu_to_be16(VLAN_TCI(key->vlan_id, key->vlan_priority)); + cpu_to_be16(VLAN_TCI(match.key->vlan_id, + match.key->vlan_priority)); flow->l2_mask.inner_vlan_tci = - cpu_to_be16((VLAN_TCI(mask->vlan_id, mask->vlan_priority))); + cpu_to_be16((VLAN_TCI(match.mask->vlan_id, + match.mask->vlan_priority))); flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q); flow->l2_mask.inner_vlan_tpid = htons(0xffff); flow->l2_key.num_vlans = 1; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { - struct flow_dissector_key_ipv4_addrs *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS); - struct flow_dissector_key_ipv4_addrs *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS; - flow->l3_key.ipv4.daddr.s_addr = key->dst; - flow->l3_mask.ipv4.daddr.s_addr = mask->dst; - flow->l3_key.ipv4.saddr.s_addr = key->src; - flow->l3_mask.ipv4.saddr.s_addr = mask->src; - } else if (dissector_uses_key(dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { - struct flow_dissector_key_ipv6_addrs *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS); - struct flow_dissector_key_ipv6_addrs *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS); + flow->l3_key.ipv4.daddr.s_addr = match.key->dst; + flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst; + flow->l3_key.ipv4.saddr.s_addr = match.key->src; + flow->l3_mask.ipv4.saddr.s_addr = match.mask->src; + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + flow_rule_match_ipv6_addrs(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS; - flow->l3_key.ipv6.daddr = key->dst; - flow->l3_mask.ipv6.daddr = mask->dst; - flow->l3_key.ipv6.saddr = key->src; - flow->l3_mask.ipv6.saddr = mask->src; + flow->l3_key.ipv6.daddr = match.key->dst; + flow->l3_mask.ipv6.daddr = match.mask->dst; + flow->l3_key.ipv6.saddr = match.key->src; + flow->l3_mask.ipv6.saddr = match.mask->src; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS); - struct flow_dissector_key_ports *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + flow_rule_match_ports(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS; - flow->l4_key.ports.dport = key->dst; - flow->l4_mask.ports.dport = mask->dst; - flow->l4_key.ports.sport = key->src; - flow->l4_mask.ports.sport = mask->src; + flow->l4_key.ports.dport = match.key->dst; + flow->l4_mask.ports.dport = match.mask->dst; + flow->l4_key.ports.sport = match.key->src; + flow->l4_mask.ports.sport = match.mask->src; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ICMP)) { - struct flow_dissector_key_icmp *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP); - struct flow_dissector_key_icmp *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { + struct flow_match_icmp match; + flow_rule_match_icmp(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP; - flow->l4_key.icmp.type = key->type; - flow->l4_key.icmp.code = key->code; - flow->l4_mask.icmp.type = mask->type; - flow->l4_mask.icmp.code = mask->code; + flow->l4_key.icmp.type = match.key->type; + flow->l4_key.icmp.code = match.key->code; + flow->l4_mask.icmp.type = match.mask->type; + flow->l4_mask.icmp.code = match.mask->code; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { - struct flow_dissector_key_ipv4_addrs *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); - struct flow_dissector_key_ipv4_addrs *mask = - GET_MASK(tc_flow_cmd, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + flow_rule_match_enc_ipv4_addrs(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS; - flow->tun_key.u.ipv4.dst = key->dst; - flow->tun_mask.u.ipv4.dst = mask->dst; - flow->tun_key.u.ipv4.src = key->src; - flow->tun_mask.u.ipv4.src = mask->src; - } else if (dissector_uses_key(dissector, + flow->tun_key.u.ipv4.dst = match.key->dst; + flow->tun_mask.u.ipv4.dst = match.mask->dst; + flow->tun_key.u.ipv4.src = match.key->src; + flow->tun_mask.u.ipv4.src = match.mask->src; + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { return -EOPNOTSUPP; } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); - struct flow_dissector_key_keyid *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + flow_rule_match_enc_keyid(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID; - flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid); - flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid); + flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid); + flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid); } - if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - struct flow_dissector_key_ports *key = - GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); - struct flow_dissector_key_ports *mask = - GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_match_ports match; + flow_rule_match_enc_ports(rule, &match); flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS; - flow->tun_key.tp_dst = key->dst; - flow->tun_mask.tp_dst = mask->dst; - flow->tun_key.tp_src = key->src; - flow->tun_mask.tp_src = mask->src; + flow->tun_key.tp_dst = match.key->dst; + flow->tun_mask.tp_dst = match.mask->dst; + flow->tun_key.tp_src = match.key->src; + flow->tun_mask.tp_src = match.mask->src; } return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index c116f96956fe..39c5af5dad3d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -83,28 +83,23 @@ static void cxgb4_process_flow_match(struct net_device *dev, struct tc_cls_flower_offload *cls, struct ch_filter_specification *fs) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); u16 addr_type = 0; - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - cls->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_BASIC, - cls->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_BASIC, - cls->mask); - u16 ethtype_key = ntohs(key->n_proto); - u16 ethtype_mask = ntohs(mask->n_proto); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + u16 ethtype_key, ethtype_mask; + + flow_rule_match_basic(rule, &match); + ethtype_key = ntohs(match.key->n_proto); + ethtype_mask = ntohs(match.mask->n_proto); if (ethtype_key == ETH_P_ALL) { ethtype_key = 0; @@ -116,115 +111,89 @@ static void cxgb4_process_flow_match(struct net_device *dev, fs->val.ethtype = ethtype_key; fs->mask.ethtype = ethtype_mask; - fs->val.proto = key->ip_proto; - fs->mask.proto = mask->ip_proto; + fs->val.proto = match.key->ip_proto; + fs->mask.proto = match.mask->ip_proto; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - cls->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - cls->mask); + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); fs->type = 0; - memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst)); - memcpy(&fs->val.fip[0], &key->src, sizeof(key->src)); - memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst)); - memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src)); + memcpy(&fs->val.lip[0], &match.key->dst, sizeof(match.key->dst)); + memcpy(&fs->val.fip[0], &match.key->src, sizeof(match.key->src)); + memcpy(&fs->mask.lip[0], &match.mask->dst, sizeof(match.mask->dst)); + memcpy(&fs->mask.fip[0], &match.mask->src, sizeof(match.mask->src)); /* also initialize nat_lip/fip to same values */ - memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst)); - memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src)); - + memcpy(&fs->nat_lip[0], &match.key->dst, sizeof(match.key->dst)); + memcpy(&fs->nat_fip[0], &match.key->src, sizeof(match.key->src)); } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - cls->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - cls->mask); + struct flow_match_ipv6_addrs match; + flow_rule_match_ipv6_addrs(rule, &match); fs->type = 1; - memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst)); - memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src)); - memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst)); - memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src)); + memcpy(&fs->val.lip[0], match.key->dst.s6_addr, + sizeof(match.key->dst)); + memcpy(&fs->val.fip[0], match.key->src.s6_addr, + sizeof(match.key->src)); + memcpy(&fs->mask.lip[0], match.mask->dst.s6_addr, + sizeof(match.mask->dst)); + memcpy(&fs->mask.fip[0], match.mask->src.s6_addr, + sizeof(match.mask->src)); /* also initialize nat_lip/fip to same values */ - memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst)); - memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src)); + memcpy(&fs->nat_lip[0], match.key->dst.s6_addr, + sizeof(match.key->dst)); + memcpy(&fs->nat_fip[0], match.key->src.s6_addr, + sizeof(match.key->src)); } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; - key = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_PORTS, - cls->key); - mask = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_PORTS, - cls->mask); - fs->val.lport = cpu_to_be16(key->dst); - fs->mask.lport = cpu_to_be16(mask->dst); - fs->val.fport = cpu_to_be16(key->src); - fs->mask.fport = cpu_to_be16(mask->src); + flow_rule_match_ports(rule, &match); + fs->val.lport = cpu_to_be16(match.key->dst); + fs->mask.lport = cpu_to_be16(match.mask->dst); + fs->val.fport = cpu_to_be16(match.key->src); + fs->mask.fport = cpu_to_be16(match.mask->src); /* also initialize nat_lport/fport to same values */ - fs->nat_lport = cpu_to_be16(key->dst); - fs->nat_fport = cpu_to_be16(key->src); + fs->nat_lport = cpu_to_be16(match.key->dst); + fs->nat_fport = cpu_to_be16(match.key->src); } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; - key = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IP, - cls->key); - mask = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IP, - cls->mask); - fs->val.tos = key->tos; - fs->mask.tos = mask->tos; + flow_rule_match_ip(rule, &match); + fs->val.tos = match.key->tos; + fs->mask.tos = match.mask->tos; } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; - key = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - cls->key); - mask = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - cls->mask); - fs->val.vni = be32_to_cpu(key->keyid); - fs->mask.vni = be32_to_cpu(mask->keyid); + flow_rule_match_enc_keyid(rule, &match); + fs->val.vni = be32_to_cpu(match.key->keyid); + fs->mask.vni = be32_to_cpu(match.mask->keyid); if (fs->mask.vni) { fs->val.encap_vld = 1; fs->mask.encap_vld = 1; } } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; u16 vlan_tci, vlan_tci_mask; - key = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_VLAN, - cls->key); - mask = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_VLAN, - cls->mask); - vlan_tci = key->vlan_id | (key->vlan_priority << - VLAN_PRIO_SHIFT); - vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << - VLAN_PRIO_SHIFT); + flow_rule_match_vlan(rule, &match); + vlan_tci = match.key->vlan_id | (match.key->vlan_priority << + VLAN_PRIO_SHIFT); + vlan_tci_mask = match.mask->vlan_id | (match.mask->vlan_priority << + VLAN_PRIO_SHIFT); fs->val.ivlan = vlan_tci; fs->mask.ivlan = vlan_tci_mask; @@ -255,10 +224,12 @@ static void cxgb4_process_flow_match(struct net_device *dev, static int cxgb4_validate_flow_match(struct net_device *dev, struct tc_cls_flower_offload *cls) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; u16 ethtype_mask = 0; u16 ethtype_key = 0; - if (cls->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | @@ -268,36 +239,29 @@ static int cxgb4_validate_flow_match(struct net_device *dev, BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IP))) { netdev_warn(dev, "Unsupported key used: 0x%x\n", - cls->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_BASIC, - cls->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_BASIC, - cls->mask); - ethtype_key = ntohs(key->n_proto); - ethtype_mask = ntohs(mask->n_proto); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ethtype_key = ntohs(match.key->n_proto); + ethtype_mask = ntohs(match.mask->n_proto); } - if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { u16 eth_ip_type = ethtype_key & ethtype_mask; - struct flow_dissector_key_ip *mask; + struct flow_match_ip match; if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) { netdev_err(dev, "IP Key supported only with IPv4/v6"); return -EINVAL; } - mask = skb_flow_dissector_target(cls->dissector, - FLOW_DISSECTOR_KEY_IP, - cls->mask); - if (mask->ttl) { + flow_rule_match_ip(rule, &match); + if (match.mask->ttl) { netdev_warn(dev, "ttl match unsupported for offload"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5c6731b97059..44856a84738d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7169,11 +7169,13 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, struct tc_cls_flower_offload *f, struct i40e_cloud_filter *filter) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; struct i40e_pf *pf = vsi->back; u8 field_flags = 0; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -7183,143 +7185,109 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - - if (mask->keyid != 0) + flow_rule_match_enc_keyid(rule, &match); + if (match.mask->keyid != 0) field_flags |= I40E_CLOUD_FIELD_TEN_ID; - filter->tenant_id = be32_to_cpu(key->keyid); + filter->tenant_id = be32_to_cpu(match.key->keyid); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; n_proto_mask = 0; } filter->n_proto = n_proto_key & n_proto_mask; - filter->ip_proto = key->ip_proto; + filter->ip_proto = match.key->ip_proto; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + flow_rule_match_eth_addrs(rule, &match); /* use is_broadcast and is_zero to check for all 0xf or 0 */ - if (!is_zero_ether_addr(mask->dst)) { - if (is_broadcast_ether_addr(mask->dst)) { + if (!is_zero_ether_addr(match.mask->dst)) { + if (is_broadcast_ether_addr(match.mask->dst)) { field_flags |= I40E_CLOUD_FIELD_OMAC; } else { dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", - mask->dst); + match.mask->dst); return I40E_ERR_CONFIG; } } - if (!is_zero_ether_addr(mask->src)) { - if (is_broadcast_ether_addr(mask->src)) { + if (!is_zero_ether_addr(match.mask->src)) { + if (is_broadcast_ether_addr(match.mask->src)) { field_flags |= I40E_CLOUD_FIELD_IMAC; } else { dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", - mask->src); + match.mask->src); return I40E_ERR_CONFIG; } } - ether_addr_copy(filter->dst_mac, key->dst); - ether_addr_copy(filter->src_mac, key->src); + ether_addr_copy(filter->dst_mac, match.key->dst); + ether_addr_copy(filter->src_mac, match.key->src); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; - if (mask->vlan_id) { - if (mask->vlan_id == VLAN_VID_MASK) { + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { field_flags |= I40E_CLOUD_FIELD_IVLAN; } else { dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", - mask->vlan_id); + match.mask->vlan_id); return I40E_ERR_CONFIG; } } - filter->vlan_id = cpu_to_be16(key->vlan_id); + filter->vlan_id = cpu_to_be16(match.key->vlan_id); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; - if (mask->dst) { - if (mask->dst == cpu_to_be32(0xffffffff)) { + flow_rule_match_ipv4_addrs(rule, &match); + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n", - &mask->dst); + &match.mask->dst); return I40E_ERR_CONFIG; } } - if (mask->src) { - if (mask->src == cpu_to_be32(0xffffffff)) { + if (match.mask->src) { + if (match.mask->src == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n", - &mask->src); + &match.mask->src); return I40E_ERR_CONFIG; } } @@ -7328,70 +7296,60 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); return I40E_ERR_CONFIG; } - filter->dst_ipv4 = key->dst; - filter->src_ipv4 = key->src; + filter->dst_ipv4 = match.key->dst; + filter->src_ipv4 = match.key->src; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); /* src and dest IPV6 address should not be LOOPBACK * (0:0:0:0:0:0:0:1), which can be represented as ::1 */ - if (ipv6_addr_loopback(&key->dst) || - ipv6_addr_loopback(&key->src)) { + if (ipv6_addr_loopback(&match.key->dst) || + ipv6_addr_loopback(&match.key->src)) { dev_err(&pf->pdev->dev, "Bad ipv6, addr is LOOPBACK\n"); return I40E_ERR_CONFIG; } - if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + if (!ipv6_addr_any(&match.mask->dst) || + !ipv6_addr_any(&match.mask->src)) field_flags |= I40E_CLOUD_FIELD_IIP; - memcpy(&filter->src_ipv6, &key->src.s6_addr32, + memcpy(&filter->src_ipv6, &match.key->src.s6_addr32, sizeof(filter->src_ipv6)); - memcpy(&filter->dst_ipv6, &key->dst.s6_addr32, + memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32, sizeof(filter->dst_ipv6)); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; - if (mask->src) { - if (mask->src == cpu_to_be16(0xffff)) { + flow_rule_match_ports(rule, &match); + if (match.mask->src) { + if (match.mask->src == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", - be16_to_cpu(mask->src)); + be16_to_cpu(match.mask->src)); return I40E_ERR_CONFIG; } } - if (mask->dst) { - if (mask->dst == cpu_to_be16(0xffff)) { + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", - be16_to_cpu(mask->dst)); + be16_to_cpu(match.mask->dst)); return I40E_ERR_CONFIG; } } - filter->dst_port = key->dst; - filter->src_port = key->src; + filter->dst_port = match.key->dst; + filter->src_port = match.key->src; switch (filter->ip_proto) { case IPPROTO_TCP: diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 9f2b7b7adf6b..4569d69a2b55 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2439,6 +2439,8 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, struct tc_cls_flower_offload *f, struct iavf_cloud_filter *filter) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0; u16 n_proto_key = 0; u8 field_flags = 0; @@ -2447,7 +2449,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, int i = 0; struct virtchnl_filter *vf = &filter->f; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -2457,32 +2459,24 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; - if (mask->keyid != 0) + flow_rule_match_enc_keyid(rule, &match); + if (match.mask->keyid != 0) field_flags |= IAVF_CLOUD_FIELD_TEN_ID; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -2496,122 +2490,103 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, vf->flow_type = VIRTCHNL_TCP_V6_FLOW; } - if (key->ip_proto != IPPROTO_TCP) { + if (match.key->ip_proto != IPPROTO_TCP) { dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); return -EINVAL; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); /* use is_broadcast and is_zero to check for all 0xf or 0 */ - if (!is_zero_ether_addr(mask->dst)) { - if (is_broadcast_ether_addr(mask->dst)) { + if (!is_zero_ether_addr(match.mask->dst)) { + if (is_broadcast_ether_addr(match.mask->dst)) { field_flags |= IAVF_CLOUD_FIELD_OMAC; } else { dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", - mask->dst); + match.mask->dst); return I40E_ERR_CONFIG; } } - if (!is_zero_ether_addr(mask->src)) { - if (is_broadcast_ether_addr(mask->src)) { + if (!is_zero_ether_addr(match.mask->src)) { + if (is_broadcast_ether_addr(match.mask->src)) { field_flags |= IAVF_CLOUD_FIELD_IMAC; } else { dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", - mask->src); + match.mask->src); return I40E_ERR_CONFIG; } } - if (!is_zero_ether_addr(key->dst)) - if (is_valid_ether_addr(key->dst) || - is_multicast_ether_addr(key->dst)) { + if (!is_zero_ether_addr(match.key->dst)) + if (is_valid_ether_addr(match.key->dst) || + is_multicast_ether_addr(match.key->dst)) { /* set the mask if a valid dst_mac address */ for (i = 0; i < ETH_ALEN; i++) vf->mask.tcp_spec.dst_mac[i] |= 0xff; ether_addr_copy(vf->data.tcp_spec.dst_mac, - key->dst); + match.key->dst); } - if (!is_zero_ether_addr(key->src)) - if (is_valid_ether_addr(key->src) || - is_multicast_ether_addr(key->src)) { + if (!is_zero_ether_addr(match.key->src)) + if (is_valid_ether_addr(match.key->src) || + is_multicast_ether_addr(match.key->src)) { /* set the mask if a valid dst_mac address */ for (i = 0; i < ETH_ALEN; i++) vf->mask.tcp_spec.src_mac[i] |= 0xff; ether_addr_copy(vf->data.tcp_spec.src_mac, - key->src); + match.key->src); } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; - if (mask->vlan_id) { - if (mask->vlan_id == VLAN_VID_MASK) { + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { field_flags |= IAVF_CLOUD_FIELD_IVLAN; } else { dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", - mask->vlan_id); + match.mask->vlan_id); return I40E_ERR_CONFIG; } } vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); - vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + vf->data.tcp_spec.vlan_id = cpu_to_be16(match.key->vlan_id); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; - if (mask->dst) { - if (mask->dst == cpu_to_be32(0xffffffff)) { + flow_rule_match_ipv4_addrs(rule, &match); + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be32(0xffffffff)) { field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", - be32_to_cpu(mask->dst)); + be32_to_cpu(match.mask->dst)); return I40E_ERR_CONFIG; } } - if (mask->src) { - if (mask->src == cpu_to_be32(0xffffffff)) { + if (match.mask->src) { + if (match.mask->src == cpu_to_be32(0xffffffff)) { field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(mask->dst)); + be32_to_cpu(match.mask->dst)); return I40E_ERR_CONFIG; } } @@ -2620,28 +2595,23 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); return I40E_ERR_CONFIG; } - if (key->dst) { + if (match.key->dst) { vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); - vf->data.tcp_spec.dst_ip[0] = key->dst; + vf->data.tcp_spec.dst_ip[0] = match.key->dst; } - if (key->src) { + if (match.key->src) { vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff); - vf->data.tcp_spec.src_ip[0] = key->src; + vf->data.tcp_spec.src_ip[0] = match.key->src; } } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); /* validate mask, make sure it is not IPV6_ADDR_ANY */ - if (ipv6_addr_any(&mask->dst)) { + if (ipv6_addr_any(&match.mask->dst)) { dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", IPV6_ADDR_ANY); return I40E_ERR_CONFIG; @@ -2650,61 +2620,56 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, /* src and dest IPv6 address should not be LOOPBACK * (0:0:0:0:0:0:0:1) which can be represented as ::1 */ - if (ipv6_addr_loopback(&key->dst) || - ipv6_addr_loopback(&key->src)) { + if (ipv6_addr_loopback(&match.key->dst) || + ipv6_addr_loopback(&match.key->src)) { dev_err(&adapter->pdev->dev, "ipv6 addr should not be loopback\n"); return I40E_ERR_CONFIG; } - if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + if (!ipv6_addr_any(&match.mask->dst) || + !ipv6_addr_any(&match.mask->src)) field_flags |= IAVF_CLOUD_FIELD_IIP; for (i = 0; i < 4; i++) vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff); - memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32, + memcpy(&vf->data.tcp_spec.dst_ip, &match.key->dst.s6_addr32, sizeof(vf->data.tcp_spec.dst_ip)); for (i = 0; i < 4; i++) vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff); - memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32, + memcpy(&vf->data.tcp_spec.src_ip, &match.key->src.s6_addr32, sizeof(vf->data.tcp_spec.src_ip)); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; - if (mask->src) { - if (mask->src == cpu_to_be16(0xffff)) { + flow_rule_match_ports(rule, &match); + if (match.mask->src) { + if (match.mask->src == cpu_to_be16(0xffff)) { field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", - be16_to_cpu(mask->src)); + be16_to_cpu(match.mask->src)); return I40E_ERR_CONFIG; } } - if (mask->dst) { - if (mask->dst == cpu_to_be16(0xffff)) { + if (match.mask->dst) { + if (match.mask->dst == cpu_to_be16(0xffff)) { field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", - be16_to_cpu(mask->dst)); + be16_to_cpu(match.mask->dst)); return I40E_ERR_CONFIG; } } - if (key->dst) { + if (match.key->dst) { vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); - vf->data.tcp_spec.dst_port = key->dst; + vf->data.tcp_spec.dst_port = match.key->dst; } - if (key->src) { + if (match.key->src) { vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff); - vf->data.tcp_spec.src_port = key->src; + vf->data.tcp_spec.src_port = match.key->src; } } vf->field_flags = field_flags; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d35cc9697e2d..6d812e96572d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2581,9 +2581,11 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter, int traffic_class, struct igb_nfc_filter *input) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; struct netlink_ext_ack *extack = f->common.extack; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -2593,78 +2595,60 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter, return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); - - if (!is_zero_ether_addr(mask->dst)) { - if (!is_broadcast_ether_addr(mask->dst)) { + flow_rule_match_eth_addrs(rule, &match); + if (!is_zero_ether_addr(match.mask->dst)) { + if (!is_broadcast_ether_addr(match.mask->dst)) { NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR; - ether_addr_copy(input->filter.dst_addr, key->dst); + ether_addr_copy(input->filter.dst_addr, match.key->dst); } - if (!is_zero_ether_addr(mask->src)) { - if (!is_broadcast_ether_addr(mask->src)) { + if (!is_zero_ether_addr(match.mask->src)) { + if (!is_broadcast_ether_addr(match.mask->src)) { NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR; - ether_addr_copy(input->filter.src_addr, key->src); + ether_addr_copy(input->filter.src_addr, match.key->src); } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - - if (mask->n_proto) { - if (mask->n_proto != ETHER_TYPE_FULL_MASK) { + flow_rule_match_basic(rule, &match); + if (match.mask->n_proto) { + if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; - input->filter.etype = key->n_proto; + input->filter.etype = match.key->n_proto; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - - if (mask->vlan_priority) { - if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_priority) { + if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) { NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); return -EINVAL; } input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; - input->filter.vlan_tci = key->vlan_priority; + input->filter.vlan_tci = match.key->vlan_priority; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 046948ead152..47bb4eb894c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -496,25 +496,21 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, void *headers_c, void *headers_v) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_match_ports enc_ports; + + flow_rule_match_enc_ports(rule, &enc_ports); /* Full udp dst port must be given */ - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { NL_SET_ERR_MSG_MOD(extack, "VXLAN decap filter must include enc_dst_port condition"); netdev_warn(priv->netdev, @@ -523,12 +519,12 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, } /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { NL_SET_ERR_MSG_MOD(extack, "Matched UDP port is not registered as a VXLAN port"); netdev_warn(priv->netdev, "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(key->dst)); + be16_to_cpu(enc_ports.key->dst)); return -EOPNOTSUPP; } @@ -536,26 +532,26 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); /* match on VNI */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); + be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); + be32_to_cpu(enc_keyid.key->keyid)); } return 0; } @@ -570,6 +566,7 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { NL_SET_ERR_MSG_MOD(f->common.extack, @@ -587,21 +584,14 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); /* gre key */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *mask = NULL; - struct flow_dissector_key_keyid *key = NULL; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); + flow_rule_match_enc_keyid(rule, &enc_keyid); MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(mask->keyid)); - - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(key->keyid)); + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 74159d39dd66..cd289ce0582d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1309,12 +1309,9 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - - struct flow_dissector_key_control *enc_control = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - int err = 0; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_control enc_control; + int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, headers_c, headers_v); @@ -1324,79 +1321,70 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->mask); + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(mask->src)); + ntohl(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(key->src)); + ntohl(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(mask->dst)); + ntohl(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(key->dst)); + ntohl(match.key->dst)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->mask); + } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + flow_rule_match_enc_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB (priv->mdev, ft_field_support.outer_ipv4_ttl)) { @@ -1437,12 +1425,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; *match_level = MLX5_MATCH_NONE; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -1461,20 +1451,18 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if ((dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - switch (key->addr_type) { + if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_enc_control(rule, &match); + switch (match.key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: if (parse_tunnel_attr(priv, spec, f, filter_dev)) @@ -1493,35 +1481,27 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, inner_headers); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(mask->n_proto)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(key->n_proto)); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - if (mask->n_proto) + flow_rule_match_basic(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match.mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match.key->n_proto)); + + if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, @@ -1533,11 +1513,15 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, cvlan_tag, 1); } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } @@ -1547,17 +1531,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_misc, misc_c, outer_second_svlan_tag, 1); MLX5_SET(fte_match_set_misc, misc_v, @@ -1570,69 +1551,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, - mask->vlan_id); + match.mask->vlan_id); MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, - key->vlan_id); + match.key->vlan_id); MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, - mask->vlan_priority); + match.mask->vlan_priority); MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, - key->vlan_priority); + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), - mask->dst); + match.mask->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), - key->dst); + match.key->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), - mask->src); + match.mask->src); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), - key->src); + match.key->src); - if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - struct flow_dissector_key_control *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->mask); - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; /* the HW doesn't support frag first/later */ - if (mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) return -EOPNOTSUPP; - if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, - key->flags & FLOW_DIS_IS_FRAGMENT); + match.key->flags & FLOW_DIS_IS_FRAGMENT); /* the HW doesn't need L3 inline to match on frag=no */ - if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) *match_level = MLX5_MATCH_L2; /* *** L2 attributes parsing up to here *** */ else @@ -1640,102 +1610,85 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - ip_proto = key->ip_proto; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - mask->ip_proto); + match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - key->ip_proto); + match.key->ip_proto); - if (mask->ip_proto) + if (match.mask->ip_proto) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + flow_rule_match_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY || - ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY) + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) *match_level = MLX5_MATCH_L3; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_ipv4_ttl)) { NL_SET_ERR_MSG_MOD(extack, @@ -1743,44 +1696,39 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (mask->tos || mask->ttl) + if (match.mask->tos || match.mask->ttl) *match_level = MLX5_MATCH_L3; } /* *** L3 attributes parsing up to here *** */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); switch (ip_proto) { case IPPROTO_TCP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_sport, ntohs(mask->src)); + tcp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_sport, ntohs(key->src)); + tcp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_dport, ntohs(mask->dst)); + tcp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_dport, ntohs(key->dst)); + tcp_dport, ntohs(match.key->dst)); break; case IPPROTO_UDP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); + udp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); + udp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); + udp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); + udp_dport, ntohs(match.key->dst)); break; default: NL_SET_ERR_MSG_MOD(extack, @@ -1790,26 +1738,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EINVAL; } - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L4; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - struct flow_dissector_key_tcp *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + flow_rule_match_tcp(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, - ntohs(mask->flags)); + ntohs(match.mask->flags)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, - ntohs(key->flags)); + ntohs(match.key->flags)); - if (mask->flags) + if (match.mask->flags) *match_level = MLX5_MATCH_L4; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ff072358d950..a20379e29e02 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -113,59 +113,49 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(f->rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, - (char *) &key->src, - (char *) &mask->src, 4); + (char *) &match.key->src, + (char *) &match.mask->src, 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, - (char *) &key->dst, - (char *) &mask->dst, 4); + (char *) &match.key->dst, + (char *) &match.mask->dst, 4); } static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(f->rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127, - &key->src.s6_addr[0x0], - &mask->src.s6_addr[0x0], 4); + &match.key->src.s6_addr[0x0], + &match.mask->src.s6_addr[0x0], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95, - &key->src.s6_addr[0x4], - &mask->src.s6_addr[0x4], 4); + &match.key->src.s6_addr[0x4], + &match.mask->src.s6_addr[0x4], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63, - &key->src.s6_addr[0x8], - &mask->src.s6_addr[0x8], 4); + &match.key->src.s6_addr[0x8], + &match.mask->src.s6_addr[0x8], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, - &key->src.s6_addr[0xC], - &mask->src.s6_addr[0xC], 4); + &match.key->src.s6_addr[0xC], + &match.mask->src.s6_addr[0xC], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127, - &key->dst.s6_addr[0x0], - &mask->dst.s6_addr[0x0], 4); + &match.key->dst.s6_addr[0x0], + &match.mask->dst.s6_addr[0x0], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95, - &key->dst.s6_addr[0x4], - &mask->dst.s6_addr[0x4], 4); + &match.key->dst.s6_addr[0x4], + &match.mask->dst.s6_addr[0x4], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63, - &key->dst.s6_addr[0x8], - &mask->dst.s6_addr[0x8], 4); + &match.key->dst.s6_addr[0x8], + &match.mask->dst.s6_addr[0x8], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, - &key->dst.s6_addr[0xC], - &mask->dst.s6_addr[0xC], 4); + &match.key->dst.s6_addr[0xC], + &match.mask->dst.s6_addr[0xC], 4); } static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, @@ -173,9 +163,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u8 ip_proto) { - struct flow_dissector_key_ports *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_ports match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) return 0; if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { @@ -184,16 +175,13 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + flow_rule_match_ports(rule, &match); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT, - ntohs(key->dst), ntohs(mask->dst)); + ntohs(match.key->dst), + ntohs(match.mask->dst)); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT, - ntohs(key->src), ntohs(mask->src)); + ntohs(match.key->src), + ntohs(match.mask->src)); return 0; } @@ -202,9 +190,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u8 ip_proto) { - struct flow_dissector_key_tcp *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_tcp match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) return 0; if (ip_proto != IPPROTO_TCP) { @@ -213,14 +202,11 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + flow_rule_match_tcp(rule, &match); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, - ntohs(key->flags), ntohs(mask->flags)); + ntohs(match.key->flags), + ntohs(match.mask->flags)); return 0; } @@ -229,9 +215,10 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u16 n_proto) { - struct flow_dissector_key_ip *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_ip match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) return 0; if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { @@ -240,20 +227,18 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + flow_rule_match_ip(rule, &match); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, - key->ttl, mask->ttl); + match.key->ttl, match.mask->ttl); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, - key->tos & 0x3, mask->tos & 0x3); + match.key->tos & 0x3, + match.mask->tos & 0x3); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, - key->tos >> 6, mask->tos >> 6); + match.key->tos >> 6, + match.mask->tos >> 6); return 0; } @@ -263,13 +248,15 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0; u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -286,25 +273,19 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); - addr_type = key->addr_type; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -314,60 +295,53 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, MLXSW_AFK_ELEMENT_ETHERTYPE, n_proto_key, n_proto_mask); - ip_proto = key->ip_proto; + ip_proto = match.key->ip_proto; mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_PROTO, - key->ip_proto, mask->ip_proto); + match.key->ip_proto, + match.mask->ip_proto); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DMAC_32_47, - key->dst, mask->dst, 2); + match.key->dst, + match.mask->dst, 2); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DMAC_0_31, - key->dst + 2, mask->dst + 2, 4); + match.key->dst + 2, + match.mask->dst + 2, 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SMAC_32_47, - key->src, mask->src, 2); + match.key->src, + match.mask->src, 2); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SMAC_0_31, - key->src + 2, mask->src + 2, 4); + match.key->src + 2, + match.mask->src + 2, 4); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + flow_rule_match_vlan(rule, &match); if (mlxsw_sp_acl_block_is_egress_bound(block)) { NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); return -EOPNOTSUPP; } - if (mask->vlan_id != 0) + if (match.mask->vlan_id != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VID, - key->vlan_id, - mask->vlan_id); - if (mask->vlan_priority != 0) + match.key->vlan_id, + match.mask->vlan_id); + if (match.mask->vlan_priority != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_PCP, - key->vlan_priority, - mask->vlan_priority); + match.key->vlan_priority, + match.mask->vlan_priority); } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 8d54b36afee8..43192640bdd1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -587,6 +587,7 @@ static int nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, char *nfp_action, int *a_len, u32 *csum_updated) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl; struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos; @@ -643,13 +644,11 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, return err; } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *basic; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - basic = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC, - flow->key); - ip_proto = basic->ip_proto; + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; } if (set_eth.head.len_lw) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index c04a0d6b0184..1279fa5da9e1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -8,31 +8,41 @@ #include "main.h" static void -nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame, - struct tc_cls_flower_offload *flow, u8 key_type, - bool mask_version) +nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct tc_cls_flower_offload *flow, u8 key_type) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_vlan *flow_vlan; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); u16 tmp_tci; - memset(frame, 0, sizeof(struct nfp_flower_meta_tci)); - /* Populate the metadata frame. */ - frame->nfp_flow_key_layer = key_type; - frame->mask_id = ~0; + memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); + memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - flow_vlan = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_VLAN, - target); + /* Populate the metadata frame. */ + ext->nfp_flow_key_layer = key_type; + ext->mask_id = ~0; + + msk->nfp_flow_key_layer = key_type; + msk->mask_id = ~0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); /* Populate the tci field. */ - if (flow_vlan->vlan_id || flow_vlan->vlan_priority) { + if (match.key->vlan_id || match.key->vlan_priority) { tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - flow_vlan->vlan_priority) | + match.key->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - flow_vlan->vlan_id) | + match.key->vlan_id) | NFP_FLOWER_MASK_VLAN_CFI; - frame->tci = cpu_to_be16(tmp_tci); + ext->tci = cpu_to_be16(tmp_tci); + tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + match.mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + match.mask->vlan_id) | + NFP_FLOWER_MASK_VLAN_CFI; + msk->tci = cpu_to_be16(tmp_tci); } } } @@ -64,231 +74,244 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, } static void -nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_eth_addrs *addr; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); - memset(frame, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - addr = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - target); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); /* Populate mac frame. */ - ether_addr_copy(frame->mac_dst, &addr->dst[0]); - ether_addr_copy(frame->mac_src, &addr->src[0]); + ether_addr_copy(ext->mac_dst, &match.key->dst[0]); + ether_addr_copy(ext->mac_src, &match.key->src[0]); + ether_addr_copy(msk->mac_dst, &match.mask->dst[0]); + ether_addr_copy(msk->mac_src, &match.mask->src[0]); } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) { - struct flow_dissector_key_mpls *mpls; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_match_mpls match; u32 t_mpls; - mpls = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_MPLS, - target); - - t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) | + flow_rule_match_mpls(rule, &match); + t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.key->mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.key->mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.key->mpls_bos) | NFP_FLOWER_MASK_MPLS_Q; - - frame->mpls_lse = cpu_to_be32(t_mpls); - } else if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC)) { + ext->mpls_lse = cpu_to_be32(t_mpls); + t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.mask->mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.mask->mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.mask->mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + msk->mpls_lse = cpu_to_be32(t_mpls); + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q * bit, which indicates an mpls ether type but without any * mpls fields. */ - struct flow_dissector_key_basic *key_basic; + struct flow_match_basic match; - key_basic = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC, - flow->key); - if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || - key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) - frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + flow_rule_match_basic(rule, &match); + if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || + match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) { + ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + } } } static void -nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_ports *tp; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); - memset(frame, 0, sizeof(struct nfp_flower_tp_ports)); + memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); + memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - tp = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_PORTS, - target); - frame->port_src = tp->src; - frame->port_dst = tp->dst; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + ext->port_src = match.key->src; + ext->port_dst = match.key->dst; + msk->port_src = match.mask->src; + msk->port_dst = match.mask->dst; } } static void -nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, + struct nfp_flower_ip_ext *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *basic; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - basic = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC, - target); - frame->proto = basic->ip_proto; + flow_rule_match_basic(rule, &match); + ext->proto = match.key->ip_proto; + msk->proto = match.mask->ip_proto; } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *flow_ip; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; - flow_ip = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_IP, - target); - frame->tos = flow_ip->tos; - frame->ttl = flow_ip->ttl; + flow_rule_match_ip(rule, &match); + ext->tos = match.key->tos; + ext->ttl = match.key->ttl; + msk->tos = match.mask->tos; + msk->ttl = match.mask->ttl; } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *tcp; - u32 tcp_flags; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + u16 tcp_flags; - tcp = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_TCP, target); - tcp_flags = be16_to_cpu(tcp->flags); + flow_rule_match_tcp(rule, &match); + tcp_flags = be16_to_cpu(match.key->flags); - if (tcp_flags & TCPHDR_FIN) - frame->flags |= NFP_FL_TCP_FLAG_FIN; - if (tcp_flags & TCPHDR_SYN) - frame->flags |= NFP_FL_TCP_FLAG_SYN; - if (tcp_flags & TCPHDR_RST) - frame->flags |= NFP_FL_TCP_FLAG_RST; - if (tcp_flags & TCPHDR_PSH) - frame->flags |= NFP_FL_TCP_FLAG_PSH; - if (tcp_flags & TCPHDR_URG) - frame->flags |= NFP_FL_TCP_FLAG_URG; + if (tcp_flags & TCPHDR_FIN) { + ext->flags |= NFP_FL_TCP_FLAG_FIN; + msk->flags |= NFP_FL_TCP_FLAG_FIN; + } + if (tcp_flags & TCPHDR_SYN) { + ext->flags |= NFP_FL_TCP_FLAG_SYN; + msk->flags |= NFP_FL_TCP_FLAG_SYN; + } + if (tcp_flags & TCPHDR_RST) { + ext->flags |= NFP_FL_TCP_FLAG_RST; + msk->flags |= NFP_FL_TCP_FLAG_RST; + } + if (tcp_flags & TCPHDR_PSH) { + ext->flags |= NFP_FL_TCP_FLAG_PSH; + msk->flags |= NFP_FL_TCP_FLAG_PSH; + } + if (tcp_flags & TCPHDR_URG) { + ext->flags |= NFP_FL_TCP_FLAG_URG; + msk->flags |= NFP_FL_TCP_FLAG_URG; + } } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - key = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - target); - if (key->flags & FLOW_DIS_IS_FRAGMENT) - frame->flags |= NFP_FL_IP_FRAGMENTED; - if (key->flags & FLOW_DIS_FIRST_FRAG) - frame->flags |= NFP_FL_IP_FRAG_FIRST; + flow_rule_match_control(rule, &match); + if (match.key->flags & FLOW_DIS_IS_FRAGMENT) { + ext->flags |= NFP_FL_IP_FRAGMENTED; + msk->flags |= NFP_FL_IP_FRAGMENTED; + } + if (match.key->flags & FLOW_DIS_FIRST_FRAG) { + ext->flags |= NFP_FL_IP_FRAG_FIRST; + msk->flags |= NFP_FL_IP_FRAG_FIRST; + } } } static void -nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_ipv4_addrs *addr; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_match_ipv4_addrs match; - memset(frame, 0, sizeof(struct nfp_flower_ipv4)); + memset(ext, 0, sizeof(struct nfp_flower_ipv4)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4)); - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { - addr = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - target); - frame->ipv4_src = addr->src; - frame->ipv4_dst = addr->dst; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + flow_rule_match_ipv4_addrs(rule, &match); + ext->ipv4_src = match.key->src; + ext->ipv4_dst = match.key->dst; + msk->ipv4_src = match.mask->src; + msk->ipv4_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); } static void -nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_ipv6_addrs *addr; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); - memset(frame, 0, sizeof(struct nfp_flower_ipv6)); + memset(ext, 0, sizeof(struct nfp_flower_ipv6)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6)); - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { - addr = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - target); - frame->ipv6_src = addr->src; - frame->ipv6_dst = addr->dst; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + ext->ipv6_src = match.key->src; + ext->ipv6_dst = match.key->dst; + msk->ipv6_src = match.mask->src; + msk->ipv6_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); } static int -nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_geneve_opt(void *ext, void *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_enc_opts *opts; + struct flow_match_enc_opts match; - opts = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS, - target); - memcpy(key_buf, opts->data, opts->len); + flow_rule_match_enc_opts(flow->rule, &match); + memcpy(ext, match.key->data, match.key->len); + memcpy(msk, match.mask->data, match.mask->len); return 0; } static void -nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame, - struct tc_cls_flower_offload *flow, - bool mask_version) +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct tc_cls_flower_offload *flow) { - struct fl_flow_key *target = mask_version ? flow->mask : flow->key; - struct flow_dissector_key_ipv4_addrs *tun_ips; - struct flow_dissector_key_keyid *vni; - struct flow_dissector_key_ip *ip; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); - memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); + memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; u32 temp_vni; - vni = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - target); - temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET; - frame->tun_id = cpu_to_be32(temp_vni); + flow_rule_match_enc_keyid(rule, &match); + temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; + ext->tun_id = cpu_to_be32(temp_vni); + temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + msk->tun_id = cpu_to_be32(temp_vni); } - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { - tun_ips = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - target); - frame->ip_src = tun_ips->src; - frame->ip_dst = tun_ips->dst; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + ext->ip_src = match.key->src; + ext->ip_dst = match.key->dst; + msk->ip_src = match.mask->src; + msk->ip_dst = match.mask->dst; } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { - ip = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - target); - frame->tos = ip->tos; - frame->ttl = ip->ttl; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + ext->tos = match.key->tos; + ext->ttl = match.key->ttl; + msk->tos = match.mask->tos; + msk->ttl = match.mask->ttl; } } @@ -313,12 +336,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, ext = nfp_flow->unmasked_data; msk = nfp_flow->mask_data; - /* Populate Exact Metadata. */ nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, - flow, key_ls->key_layer, false); - /* Populate Mask Metadata. */ - nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)msk, - flow, key_ls->key_layer, true); + (struct nfp_flower_meta_tci *)msk, + flow, key_ls->key_layer); ext += sizeof(struct nfp_flower_meta_tci); msk += sizeof(struct nfp_flower_meta_tci); @@ -348,45 +368,33 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, msk += sizeof(struct nfp_flower_in_port); if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { - /* Populate Exact MAC Data. */ nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, - flow, false); - /* Populate Mask MAC Data. */ - nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk, - flow, true); + (struct nfp_flower_mac_mpls *)msk, + flow); ext += sizeof(struct nfp_flower_mac_mpls); msk += sizeof(struct nfp_flower_mac_mpls); } if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { - /* Populate Exact TP Data. */ nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, - flow, false); - /* Populate Mask TP Data. */ - nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk, - flow, true); + (struct nfp_flower_tp_ports *)msk, + flow); ext += sizeof(struct nfp_flower_tp_ports); msk += sizeof(struct nfp_flower_tp_ports); } if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { - /* Populate Exact IPv4 Data. */ nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, - flow, false); - /* Populate Mask IPv4 Data. */ - nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk, - flow, true); + (struct nfp_flower_ipv4 *)msk, + flow); ext += sizeof(struct nfp_flower_ipv4); msk += sizeof(struct nfp_flower_ipv4); } if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { - /* Populate Exact IPv4 Data. */ nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, - flow, false); - /* Populate Mask IPv4 Data. */ - nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk, - flow, true); + (struct nfp_flower_ipv6 *)msk, + flow); ext += sizeof(struct nfp_flower_ipv6); msk += sizeof(struct nfp_flower_ipv6); } @@ -395,10 +403,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { __be32 tun_dst; - /* Populate Exact VXLAN Data. */ - nfp_flower_compile_ipv4_udp_tun((void *)ext, flow, false); - /* Populate Mask VXLAN Data. */ - nfp_flower_compile_ipv4_udp_tun((void *)msk, flow, true); + nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow); tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst; ext += sizeof(struct nfp_flower_ipv4_udp_tun); msk += sizeof(struct nfp_flower_ipv4_udp_tun); @@ -410,11 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, nfp_tunnel_add_ipv4_off(app, tun_dst); if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, flow, false); - if (err) - return err; - - err = nfp_flower_compile_geneve_opt(msk, flow, true); + err = nfp_flower_compile_geneve_opt(ext, msk, flow); if (err) return err; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 2cdbf29ecbe7..74f7ff292052 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -102,23 +102,22 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) { - return dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS) || - dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS) || - dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_PORTS) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } static int -nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, +nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts, u32 *key_layer_two, int *key_size) { - if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) + if (enc_opts->key->len > NFP_FL_MAX_GENEVE_OPT_KEY) return -EOPNOTSUPP; - if (enc_opts->len > 0) { + if (enc_opts->key->len > 0) { *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; *key_size += sizeof(struct nfp_flower_geneve_options); } @@ -133,20 +132,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, struct tc_cls_flower_offload *flow, enum nfp_flower_tun_type *tun_type) { - struct flow_dissector_key_basic *mask_basic = NULL; - struct flow_dissector_key_basic *key_basic = NULL; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_basic basic = { NULL, NULL}; struct nfp_flower_priv *priv = app->priv; u32 key_layer_two; u8 key_layer; int key_size; int err; - if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) + if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; /* If any tun dissector is used then the required set must be used. */ - if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && - (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) return -EOPNOTSUPP; @@ -155,76 +155,53 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_size = sizeof(struct nfp_flower_meta_tci) + sizeof(struct nfp_flower_in_port); - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS) || - dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { key_layer |= NFP_FLOWER_LAYER_MAC; key_size += sizeof(struct nfp_flower_mac_mpls); } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *flow_vlan; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan vlan; - flow_vlan = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_VLAN, - flow->mask); + flow_rule_match_vlan(rule, &vlan); if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) && - flow_vlan->vlan_priority) + vlan.key->vlan_priority) return -EOPNOTSUPP; } - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; - struct flow_dissector_key_ports *mask_enc_ports = NULL; - struct flow_dissector_key_enc_opts *enc_op = NULL; - struct flow_dissector_key_ports *enc_ports = NULL; - struct flow_dissector_key_control *mask_enc_ctl = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - flow->mask); - struct flow_dissector_key_control *enc_ctl = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - flow->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_enc_opts enc_op = { NULL, NULL }; + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_control enc_ctl; + struct flow_match_ports enc_ports; - if (mask_enc_ctl->addr_type != 0xffff || - enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) + flow_rule_match_enc_control(rule, &enc_ctl); + + if (enc_ctl.mask->addr_type != 0xffff || + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) return -EOPNOTSUPP; /* These fields are already verified as used. */ - mask_ipv4 = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - flow->mask); - if (mask_ipv4->dst != cpu_to_be32(~0)) + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) return -EOPNOTSUPP; - mask_enc_ports = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - flow->mask); - enc_ports = - skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - flow->key); - if (mask_enc_ports->dst != cpu_to_be16(~0)) + flow_rule_match_enc_ports(rule, &enc_ports); + if (enc_ports.mask->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; - if (dissector_uses_key(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS)) { - enc_op = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS, - flow->key); - } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + flow_rule_match_enc_opts(rule, &enc_op); - switch (enc_ports->dst) { + switch (enc_ports.key->dst) { case htons(NFP_FL_VXLAN_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); - if (enc_op) + if (enc_op.key) return -EOPNOTSUPP; break; case htons(NFP_FL_GENEVE_PORT): @@ -236,11 +213,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); - if (!enc_op) + if (!enc_op.key) break; if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) return -EOPNOTSUPP; - err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two, + err = nfp_flower_calc_opt_layer(&enc_op, &key_layer_two, &key_size); if (err) return err; @@ -254,19 +231,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, return -EOPNOTSUPP; } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - mask_basic = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC, - flow->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) + flow_rule_match_basic(rule, &basic); - key_basic = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_BASIC, - flow->key); - } - - if (mask_basic && mask_basic->n_proto) { + if (basic.mask && basic.mask->n_proto) { /* Ethernet type is present in the key. */ - switch (key_basic->n_proto) { + switch (basic.key->n_proto) { case cpu_to_be16(ETH_P_IP): key_layer |= NFP_FLOWER_LAYER_IPV4; key_size += sizeof(struct nfp_flower_ipv4); @@ -305,9 +275,9 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } } - if (mask_basic && mask_basic->ip_proto) { + if (basic.mask && basic.mask->ip_proto) { /* Ethernet type is present in the key. */ - switch (key_basic->ip_proto) { + switch (basic.key->ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: @@ -324,14 +294,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *tcp; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp tcp; u32 tcp_flags; - tcp = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_TCP, - flow->key); - tcp_flags = be16_to_cpu(tcp->flags); + flow_rule_match_tcp(rule, &tcp); + tcp_flags = be16_to_cpu(tcp.key->flags); if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) return -EOPNOTSUPP; @@ -347,12 +315,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, * space, thus we need to ensure we include a IPv4/IPv6 key * layer if we have not done so already. */ - if (!key_basic) + if (!basic.key) return -EOPNOTSUPP; if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && !(key_layer & NFP_FLOWER_LAYER_IPV6)) { - switch (key_basic->n_proto) { + switch (basic.key->n_proto) { case cpu_to_be16(ETH_P_IP): key_layer |= NFP_FLOWER_LAYER_IPV4; key_size += sizeof(struct nfp_flower_ipv4); @@ -369,14 +337,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key_ctl; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control ctl; - key_ctl = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - flow->key); - - if (key_ctl->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) + flow_rule_match_control(rule, &ctl); + if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index b16ce7d93caf..81d5b9304229 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2033,24 +2033,20 @@ qede_tc_parse_ports(struct qede_dev *edev, struct tc_cls_flower_offload *f, struct qede_arfs_tuple *t) { - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key, *mask; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; - if ((key->src && mask->src != U16_MAX) || - (key->dst && mask->dst != U16_MAX)) { + flow_rule_match_ports(rule, &match); + if ((match.key->src && match.mask->src != U16_MAX) || + (match.key->dst && match.mask->dst != U16_MAX)) { DP_NOTICE(edev, "Do not support ports masks\n"); return -EINVAL; } - t->src_port = key->src; - t->dst_port = key->dst; + t->src_port = match.key->src; + t->dst_port = match.key->dst; } return 0; @@ -2061,32 +2057,27 @@ qede_tc_parse_v6_common(struct qede_dev *edev, struct tc_cls_flower_offload *f, struct qede_arfs_tuple *t) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct in6_addr zero_addr, addr; memset(&zero_addr, 0, sizeof(addr)); memset(&addr, 0xff, sizeof(addr)); - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { - struct flow_dissector_key_ipv6_addrs *key, *mask; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); - - if ((memcmp(&key->src, &zero_addr, sizeof(addr)) && - memcmp(&mask->src, &addr, sizeof(addr))) || - (memcmp(&key->dst, &zero_addr, sizeof(addr)) && - memcmp(&mask->dst, &addr, sizeof(addr)))) { + flow_rule_match_ipv6_addrs(rule, &match); + if ((memcmp(&match.key->src, &zero_addr, sizeof(addr)) && + memcmp(&match.mask->src, &addr, sizeof(addr))) || + (memcmp(&match.key->dst, &zero_addr, sizeof(addr)) && + memcmp(&match.mask->dst, &addr, sizeof(addr)))) { DP_NOTICE(edev, "Do not support IPv6 address prefix/mask\n"); return -EINVAL; } - memcpy(&t->src_ipv6, &key->src, sizeof(addr)); - memcpy(&t->dst_ipv6, &key->dst, sizeof(addr)); + memcpy(&t->src_ipv6, &match.key->src, sizeof(addr)); + memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr)); } if (qede_tc_parse_ports(edev, f, t)) @@ -2100,24 +2091,20 @@ qede_tc_parse_v4_common(struct qede_dev *edev, struct tc_cls_flower_offload *f, struct qede_arfs_tuple *t) { - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { - struct flow_dissector_key_ipv4_addrs *key, *mask; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; - if ((key->src && mask->src != U32_MAX) || - (key->dst && mask->dst != U32_MAX)) { + flow_rule_match_ipv4_addrs(rule, &match); + if ((match.key->src && match.mask->src != U32_MAX) || + (match.key->dst && match.mask->dst != U32_MAX)) { DP_NOTICE(edev, "Do not support ipv4 prefix/masks\n"); return -EINVAL; } - t->src_ipv4 = key->src; - t->dst_ipv4 = key->dst; + t->src_ipv4 = match.key->src; + t->dst_ipv4 = match.key->dst; } if (qede_tc_parse_ports(edev, f, t)) @@ -2175,19 +2162,21 @@ qede_parse_flower_attr(struct qede_dev *edev, __be16 proto, struct tc_cls_flower_offload *f, struct qede_arfs_tuple *tuple) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; int rc = -EINVAL; u8 ip_proto = 0; memset(tuple, 0, sizeof(*tuple)); - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { DP_NOTICE(edev, "Unsupported key set:0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } @@ -2197,13 +2186,11 @@ qede_parse_flower_attr(struct qede_dev *edev, __be16 proto, return -EPROTONOSUPPORT; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - ip_proto = key->ip_proto; + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; } if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP)) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h new file mode 100644 index 000000000000..461c66595763 --- /dev/null +++ b/include/net/flow_offload.h @@ -0,0 +1,115 @@ +#ifndef _NET_FLOW_OFFLOAD_H +#define _NET_FLOW_OFFLOAD_H + +#include + +struct flow_match { + struct flow_dissector *dissector; + void *mask; + void *key; +}; + +struct flow_match_basic { + struct flow_dissector_key_basic *key, *mask; +}; + +struct flow_match_control { + struct flow_dissector_key_control *key, *mask; +}; + +struct flow_match_eth_addrs { + struct flow_dissector_key_eth_addrs *key, *mask; +}; + +struct flow_match_vlan { + struct flow_dissector_key_vlan *key, *mask; +}; + +struct flow_match_ipv4_addrs { + struct flow_dissector_key_ipv4_addrs *key, *mask; +}; + +struct flow_match_ipv6_addrs { + struct flow_dissector_key_ipv6_addrs *key, *mask; +}; + +struct flow_match_ip { + struct flow_dissector_key_ip *key, *mask; +}; + +struct flow_match_ports { + struct flow_dissector_key_ports *key, *mask; +}; + +struct flow_match_icmp { + struct flow_dissector_key_icmp *key, *mask; +}; + +struct flow_match_tcp { + struct flow_dissector_key_tcp *key, *mask; +}; + +struct flow_match_mpls { + struct flow_dissector_key_mpls *key, *mask; +}; + +struct flow_match_enc_keyid { + struct flow_dissector_key_keyid *key, *mask; +}; + +struct flow_match_enc_opts { + struct flow_dissector_key_enc_opts *key, *mask; +}; + +struct flow_rule; + +void flow_rule_match_basic(const struct flow_rule *rule, + struct flow_match_basic *out); +void flow_rule_match_control(const struct flow_rule *rule, + struct flow_match_control *out); +void flow_rule_match_eth_addrs(const struct flow_rule *rule, + struct flow_match_eth_addrs *out); +void flow_rule_match_vlan(const struct flow_rule *rule, + struct flow_match_vlan *out); +void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out); +void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out); +void flow_rule_match_ip(const struct flow_rule *rule, + struct flow_match_ip *out); +void flow_rule_match_ports(const struct flow_rule *rule, + struct flow_match_ports *out); +void flow_rule_match_tcp(const struct flow_rule *rule, + struct flow_match_tcp *out); +void flow_rule_match_icmp(const struct flow_rule *rule, + struct flow_match_icmp *out); +void flow_rule_match_mpls(const struct flow_rule *rule, + struct flow_match_mpls *out); +void flow_rule_match_enc_control(const struct flow_rule *rule, + struct flow_match_control *out); +void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out); +void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out); +void flow_rule_match_enc_ip(const struct flow_rule *rule, + struct flow_match_ip *out); +void flow_rule_match_enc_ports(const struct flow_rule *rule, + struct flow_match_ports *out); +void flow_rule_match_enc_keyid(const struct flow_rule *rule, + struct flow_match_enc_keyid *out); +void flow_rule_match_enc_opts(const struct flow_rule *rule, + struct flow_match_enc_opts *out); + +struct flow_rule { + struct flow_match match; +}; + +struct flow_rule *flow_rule_alloc(void); + +static inline bool flow_rule_match_key(const struct flow_rule *rule, + enum flow_dissector_key_id key) +{ + return dissector_uses_key(rule->match.dissector, key); +} + +#endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 40965fbbcd31..04b64523cc32 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -6,6 +6,7 @@ #include #include #include +#include /* TC action not accessible from user space */ #define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1) @@ -760,13 +761,17 @@ struct tc_cls_flower_offload { struct tc_cls_common_offload common; enum tc_fl_command command; unsigned long cookie; - struct flow_dissector *dissector; - struct fl_flow_key *mask; - struct fl_flow_key *key; + struct flow_rule *rule; struct tcf_exts *exts; u32 classid; }; +static inline struct flow_rule * +tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd) +{ + return tc_flow_cmd->rule; +} + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, diff --git a/net/core/Makefile b/net/core/Makefile index fccd31e0e7f7..f97d6254e564 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o xdp.o + fib_notifier.o xdp.o flow_offload.o obj-y += net-sysfs.o obj-$(CONFIG_PAGE_POOL) += page_pool.o diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c new file mode 100644 index 000000000000..2fbf6903d2f6 --- /dev/null +++ b/net/core/flow_offload.c @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +struct flow_rule *flow_rule_alloc(void) +{ + return kzalloc(sizeof(struct flow_rule), GFP_KERNEL); +} +EXPORT_SYMBOL(flow_rule_alloc); + +#define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \ + const struct flow_match *__m = &(__rule)->match; \ + struct flow_dissector *__d = (__m)->dissector; \ + \ + (__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key); \ + (__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask); \ + +void flow_rule_match_basic(const struct flow_rule *rule, + struct flow_match_basic *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out); +} +EXPORT_SYMBOL(flow_rule_match_basic); + +void flow_rule_match_control(const struct flow_rule *rule, + struct flow_match_control *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out); +} +EXPORT_SYMBOL(flow_rule_match_control); + +void flow_rule_match_eth_addrs(const struct flow_rule *rule, + struct flow_match_eth_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_eth_addrs); + +void flow_rule_match_vlan(const struct flow_rule *rule, + struct flow_match_vlan *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out); +} +EXPORT_SYMBOL(flow_rule_match_vlan); + +void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_ipv4_addrs); + +void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_ipv6_addrs); + +void flow_rule_match_ip(const struct flow_rule *rule, + struct flow_match_ip *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out); +} +EXPORT_SYMBOL(flow_rule_match_ip); + +void flow_rule_match_ports(const struct flow_rule *rule, + struct flow_match_ports *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out); +} +EXPORT_SYMBOL(flow_rule_match_ports); + +void flow_rule_match_tcp(const struct flow_rule *rule, + struct flow_match_tcp *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out); +} +EXPORT_SYMBOL(flow_rule_match_tcp); + +void flow_rule_match_icmp(const struct flow_rule *rule, + struct flow_match_icmp *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out); +} +EXPORT_SYMBOL(flow_rule_match_icmp); + +void flow_rule_match_mpls(const struct flow_rule *rule, + struct flow_match_mpls *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out); +} +EXPORT_SYMBOL(flow_rule_match_mpls); + +void flow_rule_match_enc_control(const struct flow_rule *rule, + struct flow_match_control *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_control); + +void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs); + +void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs); + +void flow_rule_match_enc_ip(const struct flow_rule *rule, + struct flow_match_ip *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ip); + +void flow_rule_match_enc_ports(const struct flow_rule *rule, + struct flow_match_ports *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ports); + +void flow_rule_match_enc_keyid(const struct flow_rule *rule, + struct flow_match_enc_keyid *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_keyid); + +void flow_rule_match_enc_opts(const struct flow_rule *rule, + struct flow_match_enc_opts *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_opts); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6aa57fbbbaf..aaffea0b66e9 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -381,16 +381,22 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(f->flags); int err; + cls_flower.rule = flow_rule_alloc(); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; - cls_flower.dissector = &f->mask->dissector; - cls_flower.mask = &f->mask->key; - cls_flower.key = &f->mkey; + cls_flower.rule->match.dissector = &f->mask->dissector; + cls_flower.rule->match.mask = &f->mask->key; + cls_flower.rule->match.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + kfree(cls_flower.rule); + if (err < 0) { fl_hw_destroy_filter(tp, f, NULL); return err; @@ -1463,18 +1469,24 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(f->flags)) continue; + cls_flower.rule = flow_rule_alloc(); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = add ? TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; - cls_flower.dissector = &mask->dissector; - cls_flower.mask = &mask->key; - cls_flower.key = &f->mkey; + cls_flower.rule->match.dissector = &mask->dissector; + cls_flower.rule->match.mask = &mask->key; + cls_flower.rule->match.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); + if (err) { if (add && tc_skip_sw(f->flags)) return err; @@ -1489,25 +1501,32 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } -static void fl_hw_create_tmplt(struct tcf_chain *chain, - struct fl_flow_tmplt *tmplt) +static int fl_hw_create_tmplt(struct tcf_chain *chain, + struct fl_flow_tmplt *tmplt) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = chain->block; struct tcf_exts dummy_exts = { 0, }; + cls_flower.rule = flow_rule_alloc(); + if (!cls_flower.rule) + return -ENOMEM; + cls_flower.common.chain_index = chain->index; cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE; cls_flower.cookie = (unsigned long) tmplt; - cls_flower.dissector = &tmplt->dissector; - cls_flower.mask = &tmplt->mask; - cls_flower.key = &tmplt->dummy_key; + cls_flower.rule->match.dissector = &tmplt->dissector; + cls_flower.rule->match.mask = &tmplt->mask; + cls_flower.rule->match.key = &tmplt->dummy_key; cls_flower.exts = &dummy_exts; /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + kfree(cls_flower.rule); + + return 0; } static void fl_hw_destroy_tmplt(struct tcf_chain *chain, @@ -1551,12 +1570,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack); if (err) goto errout_tmplt; - kfree(tb); fl_init_dissector(&tmplt->dissector, &tmplt->mask); - fl_hw_create_tmplt(chain, tmplt); + err = fl_hw_create_tmplt(chain, tmplt); + if (err) + goto errout_tmplt; + kfree(tb); return tmplt; errout_tmplt: From c500c86b0c75da167b59ee82f78e394fd10cb792 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:44 +0100 Subject: [PATCH 0678/1436] net/mlx5e: support for two independent packet edit actions This patch adds pedit_headers_action structure to store the result of parsing tc pedit actions. Then, it calls alloc_tc_pedit_action() to populate the mlx5e hardware intermediate representation once all actions have been parsed. This patch comes in preparation for the new flow_action infrastructure, where each packet mangling comes in an separated action, ie. not packed as in tc pedit. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 81 ++++++++++++++----- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cd289ce0582d..1c8e8da166bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1804,6 +1804,12 @@ struct pedit_headers { struct udphdr udp; }; +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + static int pedit_header_offsets[] = { [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), @@ -1815,16 +1821,15 @@ static int pedit_header_offsets[] = { #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers *masks, - struct pedit_headers *vals) + struct pedit_headers_action *hdrs) { u32 *curr_pmask, *curr_pval; if (hdr_type >= __PEDIT_HDR_TYPE_MAX) goto out_err; - curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); if (*curr_pmask & mask) /* disallow acting twice on the same location */ goto out_err; @@ -1880,8 +1885,7 @@ static struct mlx5_fields fields[] = { * max from the SW pedit action. On success, it says how many HW actions were * actually parsed. */ -static int offload_pedit_fields(struct pedit_headers *masks, - struct pedit_headers *vals, +static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { @@ -1896,10 +1900,10 @@ static int offload_pedit_fields(struct pedit_headers *masks, __be16 mask_be16; void *action; - set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; - add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; - set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; - add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks; + add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks; + set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals; + add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); action = parse_attr->mod_hdr_actions; @@ -1995,12 +1999,14 @@ static int offload_pedit_fields(struct pedit_headers *masks, } static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + struct pedit_headers_action *hdrs, + int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr) { int nkeys, action_size, max_actions; - nkeys = tcf_pedit_nkeys(a); + nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ @@ -2024,18 +2030,15 @@ static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, const struct tc_action *a, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; int nkeys, i, err = -EOPNOTSUPP; u32 mask, val, offset; u8 cmd, htype; nkeys = tcf_pedit_nkeys(a); - memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); - memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); - for (i = 0; i < nkeys; i++) { htype = tcf_pedit_htype(a, i); cmd = tcf_pedit_cmd(a, i); @@ -2056,21 +2059,37 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, val = tcf_pedit_val(a, i); offset = tcf_pedit_offset(a, i); - err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); if (err) goto out_err; + + hdrs[cmd].pedits++; } - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + return 0; +out_err: + return err; +} + +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + int err; + u8 cmd; + + err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); if (err) goto out_err; - err = offload_pedit_fields(masks, vals, parse_attr, extack); + err = offload_pedit_fields(hdrs, parse_attr, extack); if (err < 0) goto out_dealloc_parsed_actions; for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &masks[cmd]; + cmd_masks = &hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); @@ -2211,6 +2230,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX] = {}; struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; u32 action = 0; @@ -2232,7 +2252,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_pedit(a)) { err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, extack); + parse_attr, hdrs, extack); if (err) return err; @@ -2286,6 +2306,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; + } + attr->action = action; if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; @@ -2446,6 +2474,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -2470,7 +2499,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_pedit(a)) { err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, extack); + parse_attr, hdrs, extack); if (err) return err; @@ -2605,6 +2634,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; + } + attr->action = action; if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; From e3ab786b42535da00c731c3585165e88bf35ab09 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:45 +0100 Subject: [PATCH 0679/1436] flow_offload: add flow action infrastructure This new infrastructure defines the nic actions that you can perform from existing network drivers. This infrastructure allows us to avoid a direct dependency with the native software TC action representation. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 73 ++++++++++++++++++++++++++++++++++++-- include/net/pkt_cls.h | 1 + net/core/flow_offload.c | 14 ++++++-- net/sched/cls_api.c | 17 +++++++++ net/sched/cls_flower.c | 7 ++-- 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 461c66595763..dabc819b6cc9 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -100,11 +100,78 @@ void flow_rule_match_enc_keyid(const struct flow_rule *rule, void flow_rule_match_enc_opts(const struct flow_rule *rule, struct flow_match_enc_opts *out); -struct flow_rule { - struct flow_match match; +enum flow_action_id { + FLOW_ACTION_ACCEPT = 0, + FLOW_ACTION_DROP, + FLOW_ACTION_TRAP, + FLOW_ACTION_GOTO, + FLOW_ACTION_REDIRECT, + FLOW_ACTION_MIRRED, + FLOW_ACTION_VLAN_PUSH, + FLOW_ACTION_VLAN_POP, + FLOW_ACTION_VLAN_MANGLE, + FLOW_ACTION_TUNNEL_ENCAP, + FLOW_ACTION_TUNNEL_DECAP, + FLOW_ACTION_MANGLE, + FLOW_ACTION_ADD, + FLOW_ACTION_CSUM, + FLOW_ACTION_MARK, }; -struct flow_rule *flow_rule_alloc(void); +/* This is mirroring enum pedit_header_type definition for easy mapping between + * tc pedit action. Legacy TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK is mapped to + * FLOW_ACT_MANGLE_UNSPEC, which is supported by no driver. + */ +enum flow_action_mangle_base { + FLOW_ACT_MANGLE_UNSPEC = 0, + FLOW_ACT_MANGLE_HDR_TYPE_ETH, + FLOW_ACT_MANGLE_HDR_TYPE_IP4, + FLOW_ACT_MANGLE_HDR_TYPE_IP6, + FLOW_ACT_MANGLE_HDR_TYPE_TCP, + FLOW_ACT_MANGLE_HDR_TYPE_UDP, +}; + +struct flow_action_entry { + enum flow_action_id id; + union { + u32 chain_index; /* FLOW_ACTION_GOTO */ + struct net_device *dev; /* FLOW_ACTION_REDIRECT */ + struct { /* FLOW_ACTION_VLAN */ + u16 vid; + __be16 proto; + u8 prio; + } vlan; + struct { /* FLOW_ACTION_PACKET_EDIT */ + enum flow_action_mangle_base htype; + u32 offset; + u32 mask; + u32 val; + } mangle; + const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ + u32 csum_flags; /* FLOW_ACTION_CSUM */ + u32 mark; /* FLOW_ACTION_MARK */ + }; +}; + +struct flow_action { + unsigned int num_entries; + struct flow_action_entry entries[0]; +}; + +static inline bool flow_action_has_entries(const struct flow_action *action) +{ + return action->num_entries; +} + +#define flow_action_for_each(__i, __act, __actions) \ + for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++]) + +struct flow_rule { + struct flow_match match; + struct flow_action action; +}; + +struct flow_rule *flow_rule_alloc(unsigned int num_actions); static inline bool flow_rule_match_key(const struct flow_rule *rule, enum flow_dissector_key_id key) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 04b64523cc32..74a7582ad47c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -622,6 +622,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop); +unsigned int tcf_exts_num_actions(struct tcf_exts *exts); enum tc_block_command { TC_BLOCK_BIND, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 2fbf6903d2f6..c3a00eac4804 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -3,9 +3,19 @@ #include #include -struct flow_rule *flow_rule_alloc(void) +struct flow_rule *flow_rule_alloc(unsigned int num_actions) { - return kzalloc(sizeof(struct flow_rule), GFP_KERNEL); + struct flow_rule *rule; + + rule = kzalloc(sizeof(struct flow_rule) + + sizeof(struct flow_action_entry) * num_actions, + GFP_KERNEL); + if (!rule) + return NULL; + + rule->action.num_entries = num_actions; + + return rule; } EXPORT_SYMBOL(flow_rule_alloc); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e2b5cb2eb34e..57713c63ac56 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,7 @@ #include #include #include +#include extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -2515,6 +2516,22 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } EXPORT_SYMBOL(tc_setup_cb_call); +unsigned int tcf_exts_num_actions(struct tcf_exts *exts) +{ + unsigned int num_acts = 0; + struct tc_action *act; + int i; + + tcf_exts_for_each_action(i, act, exts) { + if (is_tcf_pedit(act)) + num_acts += tcf_pedit_nkeys(act); + else + num_acts++; + } + return num_acts; +} +EXPORT_SYMBOL(tcf_exts_num_actions); + static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index aaffea0b66e9..0062c9133a22 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -381,7 +381,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(f->flags); int err; - cls_flower.rule = flow_rule_alloc(); + cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); if (!cls_flower.rule) return -ENOMEM; @@ -1469,7 +1469,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(f->flags)) continue; - cls_flower.rule = flow_rule_alloc(); + cls_flower.rule = + flow_rule_alloc(tcf_exts_num_actions(&f->exts)); if (!cls_flower.rule) return -ENOMEM; @@ -1508,7 +1509,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, struct tcf_block *block = chain->block; struct tcf_exts dummy_exts = { 0, }; - cls_flower.rule = flow_rule_alloc(); + cls_flower.rule = flow_rule_alloc(0); if (!cls_flower.rule) return -ENOMEM; From 3a7b68617de749d930b5503c3c5f30a17524f0bd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:46 +0100 Subject: [PATCH 0680/1436] cls_api: add translator to flow_action representation This patch implements a new function to translate from native TC action to the new flow_action representation. Moreover, this patch also updates cls_flower to use this new function. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 + net/sched/cls_api.c | 99 ++++++++++++++++++++++++++++++++++++++++++ net/sched/cls_flower.c | 14 ++++++ 3 files changed, 115 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 74a7582ad47c..c470c100b926 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -620,6 +620,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +int tc_setup_flow_action(struct flow_action *flow_action, + const struct tcf_exts *exts); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 57713c63ac56..02cf6d2fa0e1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -32,6 +32,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -2516,6 +2523,98 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } EXPORT_SYMBOL(tc_setup_cb_call); +int tc_setup_flow_action(struct flow_action *flow_action, + const struct tcf_exts *exts) +{ + const struct tc_action *act; + int i, j, k; + + if (!exts) + return 0; + + j = 0; + tcf_exts_for_each_action(i, act, exts) { + struct flow_action_entry *entry; + + entry = &flow_action->entries[j]; + if (is_tcf_gact_ok(act)) { + entry->id = FLOW_ACTION_ACCEPT; + } else if (is_tcf_gact_shot(act)) { + entry->id = FLOW_ACTION_DROP; + } else if (is_tcf_gact_trap(act)) { + entry->id = FLOW_ACTION_TRAP; + } else if (is_tcf_gact_goto_chain(act)) { + entry->id = FLOW_ACTION_GOTO; + entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_mirred_egress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT; + entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_mirred_egress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED; + entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_vlan(act)) { + switch (tcf_vlan_action(act)) { + case TCA_VLAN_ACT_PUSH: + entry->id = FLOW_ACTION_VLAN_PUSH; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + case TCA_VLAN_ACT_POP: + entry->id = FLOW_ACTION_VLAN_POP; + break; + case TCA_VLAN_ACT_MODIFY: + entry->id = FLOW_ACTION_VLAN_MANGLE; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + default: + goto err_out; + } + } else if (is_tcf_tunnel_set(act)) { + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + entry->tunnel = tcf_tunnel_info(act); + } else if (is_tcf_tunnel_release(act)) { + entry->id = FLOW_ACTION_TUNNEL_DECAP; + entry->tunnel = tcf_tunnel_info(act); + } else if (is_tcf_pedit(act)) { + for (k = 0; k < tcf_pedit_nkeys(act); k++) { + switch (tcf_pedit_cmd(act, k)) { + case TCA_PEDIT_KEY_EX_CMD_SET: + entry->id = FLOW_ACTION_MANGLE; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + entry->id = FLOW_ACTION_ADD; + break; + default: + goto err_out; + } + entry->mangle.htype = tcf_pedit_htype(act, k); + entry->mangle.mask = tcf_pedit_mask(act, k); + entry->mangle.val = tcf_pedit_val(act, k); + entry->mangle.offset = tcf_pedit_offset(act, k); + entry = &flow_action->entries[++j]; + } + } else if (is_tcf_csum(act)) { + entry->id = FLOW_ACTION_CSUM; + entry->csum_flags = tcf_csum_update_flags(act); + } else if (is_tcf_skbedit_mark(act)) { + entry->id = FLOW_ACTION_MARK; + entry->mark = tcf_skbedit_mark(act); + } else { + goto err_out; + } + + if (!is_tcf_pedit(act)) + j++; + } + return 0; +err_out: + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(tc_setup_flow_action); + unsigned int tcf_exts_num_actions(struct tcf_exts *exts) { unsigned int num_acts = 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0062c9133a22..48c54ef52a98 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -394,6 +394,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + if (err) { + kfree(cls_flower.rule); + return err; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); kfree(cls_flower.rule); @@ -1483,6 +1489,14 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, cls_flower.rule->match.mask = &mask->key; cls_flower.rule->match.key = &f->mkey; cls_flower.exts = &f->exts; + + err = tc_setup_flow_action(&cls_flower.rule->action, + &f->exts); + if (err) { + kfree(cls_flower.rule); + return err; + } + cls_flower.classid = f->res.classid; err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); From 3b1903ef97c080a80ead3a6a2305f55108e08269 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:47 +0100 Subject: [PATCH 0681/1436] flow_offload: add statistics retrieval infrastructure and use it This patch provides the flow_stats structure that acts as container for tc_cls_flower_offload, then we can use to restore the statistics on the existing TC actions. Hence, tcf_exts_stats_update() is not used from drivers anymore. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++-- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum_flower.c | 2 +- .../net/ethernet/netronome/nfp/flower/offload.c | 5 ++--- include/net/flow_offload.h | 14 ++++++++++++++ include/net/pkt_cls.h | 1 + net/sched/cls_flower.c | 4 ++++ 8 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 90a2170c5138..6a87434b7173 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1396,8 +1396,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, lastused = flow->lastused; spin_unlock(&flow->stats_lock); - tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, - lastused); + flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, + lastused); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 39c5af5dad3d..8a2d66ee1d7b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -807,9 +807,9 @@ int cxgb4_tc_flower_stats(struct net_device *dev, if (ofld_stats->packet_count != packets) { if (ofld_stats->prev_packet_count != packets) ofld_stats->last_used = jiffies; - tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count, - packets - ofld_stats->packet_count, - ofld_stats->last_used); + flow_stats_update(&cls->stats, bytes - ofld_stats->byte_count, + packets - ofld_stats->packet_count, + ofld_stats->last_used); ofld_stats->packet_count = packets; ofld_stats->byte_count = bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1c8e8da166bd..c4c6bbcafc68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3071,7 +3071,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index a20379e29e02..c090ecb62041 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -460,7 +460,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rule_get_stats; - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse); mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 74f7ff292052..fe1469d201af 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -554,9 +554,8 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); spin_lock_bh(&priv->stats_lock); - tcf_exts_stats_update(flow->exts, priv->stats[ctx_id].bytes, - priv->stats[ctx_id].pkts, - priv->stats[ctx_id].used); + flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, priv->stats[ctx_id].used); priv->stats[ctx_id].pkts = 0; priv->stats[ctx_id].bytes = 0; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index dabc819b6cc9..f9ce39992dbd 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -179,4 +179,18 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule, return dissector_uses_key(rule->match.dissector, key); } +struct flow_stats { + u64 pkts; + u64 bytes; + u64 lastused; +}; + +static inline void flow_stats_update(struct flow_stats *flow_stats, + u64 bytes, u64 pkts, u64 lastused) +{ + flow_stats->pkts = pkts; + flow_stats->bytes = bytes; + flow_stats->lastused = lastused; +} + #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c470c100b926..bea1b1c3ea9b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -765,6 +765,7 @@ struct tc_cls_flower_offload { enum tc_fl_command command; unsigned long cookie; struct flow_rule *rule; + struct flow_stats stats; struct tcf_exts *exts; u32 classid; }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 48c54ef52a98..8ec85056aa0d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -429,6 +429,10 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.classid = f->res.classid; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + + tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, + cls_flower.stats.pkts, + cls_flower.stats.lastused); } static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, From 738678817573ce45698e1bb13222f2e53622c555 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:48 +0100 Subject: [PATCH 0682/1436] drivers: net: use flow action infrastructure This patch updates drivers to use the new flow action infrastructure. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 74 +++-- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 254 +++++++++-------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 269 ++++++++---------- .../ethernet/mellanox/mlxsw/spectrum_acl.c | 2 +- .../ethernet/mellanox/mlxsw/spectrum_flower.c | 54 ++-- .../ethernet/netronome/nfp/flower/action.c | 187 ++++++------ .../net/ethernet/qlogic/qede/qede_filter.c | 12 +- 7 files changed, 422 insertions(+), 430 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6a87434b7173..7a1fdad1aea6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -61,9 +61,9 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev) static int bnxt_tc_parse_redir(struct bnxt *bp, struct bnxt_tc_actions *actions, - const struct tc_action *tc_act) + const struct flow_action_entry *act) { - struct net_device *dev = tcf_mirred_dev(tc_act); + struct net_device *dev = act->dev; if (!dev) { netdev_info(bp->dev, "no dev in mirred action"); @@ -77,16 +77,16 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, static int bnxt_tc_parse_vlan(struct bnxt *bp, struct bnxt_tc_actions *actions, - const struct tc_action *tc_act) + const struct flow_action_entry *act) { - switch (tcf_vlan_action(tc_act)) { - case TCA_VLAN_ACT_POP: + switch (act->id) { + case FLOW_ACTION_VLAN_POP: actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN; break; - case TCA_VLAN_ACT_PUSH: + case FLOW_ACTION_VLAN_PUSH: actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN; - actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act)); - actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act); + actions->push_vlan_tci = htons(act->vlan.vid); + actions->push_vlan_tpid = act->vlan.proto; break; default: return -EOPNOTSUPP; @@ -96,10 +96,10 @@ static int bnxt_tc_parse_vlan(struct bnxt *bp, static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, struct bnxt_tc_actions *actions, - const struct tc_action *tc_act) + const struct flow_action_entry *act) { - struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act); - struct ip_tunnel_key *tun_key = &tun_info->key; + const struct ip_tunnel_info *tun_info = act->tunnel; + const struct ip_tunnel_key *tun_key = &tun_info->key; if (ip_tunnel_info_af(tun_info) != AF_INET) { netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); @@ -113,51 +113,43 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, - struct tcf_exts *tc_exts) + struct flow_action *flow_action) { - const struct tc_action *tc_act; + struct flow_action_entry *act; int i, rc; - if (!tcf_exts_has_actions(tc_exts)) { + if (!flow_action_has_entries(flow_action)) { netdev_info(bp->dev, "no actions"); return -EINVAL; } - tcf_exts_for_each_action(i, tc_act, tc_exts) { - /* Drop action */ - if (is_tcf_gact_shot(tc_act)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: actions->flags |= BNXT_TC_ACTION_FLAG_DROP; return 0; /* don't bother with other actions */ - } - - /* Redirect action */ - if (is_tcf_mirred_egress_redirect(tc_act)) { - rc = bnxt_tc_parse_redir(bp, actions, tc_act); + case FLOW_ACTION_REDIRECT: + rc = bnxt_tc_parse_redir(bp, actions, act); if (rc) return rc; - continue; - } - - /* Push/pop VLAN */ - if (is_tcf_vlan(tc_act)) { - rc = bnxt_tc_parse_vlan(bp, actions, tc_act); + break; + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: + rc = bnxt_tc_parse_vlan(bp, actions, act); if (rc) return rc; - continue; - } - - /* Tunnel encap */ - if (is_tcf_tunnel_set(tc_act)) { - rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act); + break; + case FLOW_ACTION_TUNNEL_ENCAP: + rc = bnxt_tc_parse_tunnel_set(bp, actions, act); if (rc) return rc; - continue; - } - - /* Tunnel decap */ - if (is_tcf_tunnel_release(tc_act)) { + break; + case FLOW_ACTION_TUNNEL_DECAP: actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP; - continue; + break; + default: + break; } } @@ -308,7 +300,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, flow->tun_mask.tp_src = match.mask->src; } - return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts); + return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action); } static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 8a2d66ee1d7b..82a8d1970060 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -292,7 +292,7 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, u32 mask, u32 offset, u8 htype) { switch (htype) { - case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: switch (offset) { case PEDIT_ETH_DMAC_31_0: fs->newdmac = 1; @@ -310,7 +310,7 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, offload_pedit(fs, val, mask, ETH_SMAC_47_16); } break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: switch (offset) { case PEDIT_IP4_SRC: offload_pedit(fs, val, mask, IP4_SRC); @@ -320,7 +320,7 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, } fs->nat_mode = NAT_MODE_ALL; break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: switch (offset) { case PEDIT_IP6_SRC_31_0: offload_pedit(fs, val, mask, IP6_SRC_31_0); @@ -348,7 +348,7 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, } fs->nat_mode = NAT_MODE_ALL; break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: switch (offset) { case PEDIT_TCP_SPORT_DPORT: if (~mask & PEDIT_TCP_UDP_SPORT_MASK) @@ -361,7 +361,7 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, } fs->nat_mode = NAT_MODE_ALL; break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: switch (offset) { case PEDIT_UDP_SPORT_DPORT: if (~mask & PEDIT_TCP_UDP_SPORT_MASK) @@ -380,56 +380,63 @@ static void cxgb4_process_flow_actions(struct net_device *in, struct tc_cls_flower_offload *cls, struct ch_filter_specification *fs) { - const struct tc_action *a; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_action_entry *act; int i; - tcf_exts_for_each_action(i, a, cls->exts) { - if (is_tcf_gact_ok(a)) { + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_ACCEPT: fs->action = FILTER_PASS; - } else if (is_tcf_gact_shot(a)) { + break; + case FLOW_ACTION_DROP: fs->action = FILTER_DROP; - } else if (is_tcf_mirred_egress_redirect(a)) { - struct net_device *out = tcf_mirred_dev(a); + break; + case FLOW_ACTION_REDIRECT: { + struct net_device *out = act->dev; struct port_info *pi = netdev_priv(out); fs->action = FILTER_SWITCH; fs->eport = pi->port_id; - } else if (is_tcf_vlan(a)) { - u32 vlan_action = tcf_vlan_action(a); - u8 prio = tcf_vlan_push_prio(a); - u16 vid = tcf_vlan_push_vid(a); + } + break; + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: { + u8 prio = act->vlan.prio; + u16 vid = act->vlan.vid; u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid; - - switch (vlan_action) { - case TCA_VLAN_ACT_POP: + switch (act->id) { + case FLOW_ACTION_VLAN_POP: fs->newvlan |= VLAN_REMOVE; break; - case TCA_VLAN_ACT_PUSH: + case FLOW_ACTION_VLAN_PUSH: fs->newvlan |= VLAN_INSERT; fs->vlan = vlan_tci; break; - case TCA_VLAN_ACT_MODIFY: + case FLOW_ACTION_VLAN_MANGLE: fs->newvlan |= VLAN_REWRITE; fs->vlan = vlan_tci; break; default: break; } - } else if (is_tcf_pedit(a)) { + } + break; + case FLOW_ACTION_MANGLE: { u32 mask, val, offset; - int nkeys, i; u8 htype; - nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); + htype = act->mangle.htype; + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; - process_pedit_field(fs, val, mask, offset, - htype); + process_pedit_field(fs, val, mask, offset, htype); } + break; + default: + break; } } } @@ -448,101 +455,89 @@ static bool valid_l4_mask(u32 mask) } static bool valid_pedit_action(struct net_device *dev, - const struct tc_action *a) + const struct flow_action_entry *act) { u32 mask, offset; - u8 cmd, htype; - int nkeys, i; + u8 htype; - nkeys = tcf_pedit_nkeys(a); - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - mask = tcf_pedit_mask(a, i); - offset = tcf_pedit_offset(a, i); + htype = act->mangle.htype; + mask = act->mangle.mask; + offset = act->mangle.offset; - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) { - netdev_err(dev, "%s: Unsupported pedit cmd\n", + switch (htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + case PEDIT_ETH_SMAC_47_16: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", __func__); return false; } - - switch (htype) { - case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - switch (offset) { - case PEDIT_ETH_DMAC_31_0: - case PEDIT_ETH_DMAC_47_32_SMAC_15_0: - case PEDIT_ETH_SMAC_47_16: - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); - return false; - } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + switch (offset) { + case PEDIT_IP4_SRC: + case PEDIT_IP4_DST: break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: - switch (offset) { - case PEDIT_IP4_SRC: - case PEDIT_IP4_DST: - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); - return false; - } + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + switch (offset) { + case PEDIT_IP6_SRC_31_0: + case PEDIT_IP6_SRC_63_32: + case PEDIT_IP6_SRC_95_64: + case PEDIT_IP6_SRC_127_96: + case PEDIT_IP6_DST_31_0: + case PEDIT_IP6_DST_63_32: + case PEDIT_IP6_DST_95_64: + case PEDIT_IP6_DST_127_96: break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: - switch (offset) { - case PEDIT_IP6_SRC_31_0: - case PEDIT_IP6_SRC_63_32: - case PEDIT_IP6_SRC_95_64: - case PEDIT_IP6_SRC_127_96: - case PEDIT_IP6_DST_31_0: - case PEDIT_IP6_DST_63_32: - case PEDIT_IP6_DST_95_64: - case PEDIT_IP6_DST_127_96: - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); - return false; - } - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: - switch (offset) { - case PEDIT_TCP_SPORT_DPORT: - if (!valid_l4_mask(~mask)) { - netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n", - __func__); - return false; - } - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", - __func__); - return false; - } - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - switch (offset) { - case PEDIT_UDP_SPORT_DPORT: - if (!valid_l4_mask(~mask)) { - netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n", - __func__); - return false; - } - break; - default: - netdev_err(dev, "%s: Unsupported pedit field\n", + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + switch (offset) { + case PEDIT_TCP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n", __func__); return false; } break; default: - netdev_err(dev, "%s: Unsupported pedit type\n", + netdev_err(dev, "%s: Unsupported pedit field\n", __func__); return false; } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + switch (offset) { + case PEDIT_UDP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit type\n", __func__); + return false; } return true; } @@ -550,24 +545,26 @@ static bool valid_pedit_action(struct net_device *dev, static int cxgb4_validate_flow_actions(struct net_device *dev, struct tc_cls_flower_offload *cls) { - const struct tc_action *a; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_action_entry *act; bool act_redir = false; bool act_pedit = false; bool act_vlan = false; int i; - tcf_exts_for_each_action(i, a, cls->exts) { - if (is_tcf_gact_ok(a)) { + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_ACCEPT: + case FLOW_ACTION_DROP: /* Do nothing */ - } else if (is_tcf_gact_shot(a)) { - /* Do nothing */ - } else if (is_tcf_mirred_egress_redirect(a)) { + break; + case FLOW_ACTION_REDIRECT: { struct adapter *adap = netdev2adap(dev); struct net_device *n_dev, *target_dev; unsigned int i; bool found = false; - target_dev = tcf_mirred_dev(a); + target_dev = act->dev; for_each_port(adap, i) { n_dev = adap->port[i]; if (target_dev == n_dev) { @@ -585,15 +582,18 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, return -EINVAL; } act_redir = true; - } else if (is_tcf_vlan(a)) { - u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); - u32 vlan_action = tcf_vlan_action(a); + } + break; + case FLOW_ACTION_VLAN_POP: + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: { + u16 proto = be16_to_cpu(act->vlan.proto); - switch (vlan_action) { - case TCA_VLAN_ACT_POP: + switch (act->id) { + case FLOW_ACTION_VLAN_POP: break; - case TCA_VLAN_ACT_PUSH: - case TCA_VLAN_ACT_MODIFY: + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: if (proto != ETH_P_8021Q) { netdev_err(dev, "%s: Unsupported vlan proto\n", __func__); @@ -606,13 +606,17 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, return -EOPNOTSUPP; } act_vlan = true; - } else if (is_tcf_pedit(a)) { - bool pedit_valid = valid_pedit_action(dev, a); + } + break; + case FLOW_ACTION_MANGLE: { + bool pedit_valid = valid_pedit_action(dev, act); if (!pedit_valid) return -EOPNOTSUPP; act_pedit = true; - } else { + } + break; + default: netdev_err(dev, "%s: Unsupported action\n", __func__); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c4c6bbcafc68..83522c926d7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1811,11 +1811,11 @@ struct pedit_headers_action { }; static int pedit_header_offsets[] = { - [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), }; #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) @@ -1825,7 +1825,7 @@ static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, { u32 *curr_pmask, *curr_pval; - if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + if (hdr_type >= 2) goto out_err; curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); @@ -1900,10 +1900,10 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, __be16 mask_be16; void *action; - set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks; - add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks; - set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals; - add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals; + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); action = parse_attr->mod_hdr_actions; @@ -2028,44 +2028,34 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - int nkeys, i, err = -EOPNOTSUPP; + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + int err = -EOPNOTSUPP; u32 mask, val, offset; - u8 cmd, htype; + u8 htype; - nkeys = tcf_pedit_nkeys(a); + htype = act->mangle.htype; + err = -EOPNOTSUPP; /* can't be all optimistic */ - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - err = -EOPNOTSUPP; /* can't be all optimistic */ - - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { - NL_SET_ERR_MSG_MOD(extack, - "legacy pedit isn't offloaded"); - goto out_err; - } - - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { - NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded"); - goto out_err; - } - - mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); - - err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); - if (err) - goto out_err; - - hdrs[cmd].pedits++; + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; } + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + if (err) + goto out_err; + + hdrs[cmd].pedits++; + return 0; out_err: return err; @@ -2139,15 +2129,15 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, - struct tcf_exts *exts, + struct flow_action *flow_action, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; bool modify_ip_header; u8 htype, ip_proto; void *headers_v; u16 ethertype; - int nkeys, i; + int i; headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); @@ -2157,20 +2147,16 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_for_each_action(i, a, exts) { - int k; - - if (!is_tcf_pedit(a)) + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) continue; - nkeys = tcf_pedit_nkeys(a); - for (k = 0; k < nkeys; k++) { - htype = tcf_pedit_htype(a, k); - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || - htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { - modify_ip_header = true; - break; - } + htype = act->mangle.htype; + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 || + htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + modify_ip_header = true; + break; } } @@ -2188,7 +2174,7 @@ out_ok: } static bool actions_match_supported(struct mlx5e_priv *priv, - struct tcf_exts *exts, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -2205,7 +2191,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(&parse_attr->spec, exts, + return modify_header_match_supported(&parse_attr->spec, + flow_action, extack); return true; @@ -2225,53 +2212,50 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } -static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - struct pedit_headers_action hdrs[__PEDIT_CMD_MAX] = {}; struct mlx5_nic_flow_attr *attr = flow->nic_attr; - const struct tc_action *a; + struct pedit_headers_action hdrs[2] = {}; + const struct flow_action_entry *act; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), + act->csum_flags, extack)) - continue; + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a)) { - struct net_device *peer_dev = tcf_mirred_dev(a); + case FLOW_ACTION_REDIRECT: { + struct net_device *peer_dev = act->dev; if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { @@ -2286,11 +2270,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, peer_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_skbedit_mark(a)) { - u32 mark = tcf_skbedit_mark(a); + } + break; + case FLOW_ACTION_MARK: { + u32 mark = act->mark; if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, @@ -2300,10 +2283,11 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->flow_tag = mark; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; + } + break; + default: + return -EINVAL; } - - return -EINVAL; } if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || @@ -2315,7 +2299,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; @@ -2420,7 +2404,7 @@ out_err: } static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct tc_action *a, + const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action) { @@ -2429,7 +2413,8 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, if (vlan_idx >= MLX5_FS_VLAN_DEPTH) return -EOPNOTSUPP; - if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + switch (act->id) { + case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, MLX5_FS_VLAN_DEPTH)) @@ -2439,10 +2424,11 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, } else { *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } - } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a); - attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a); - attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a); + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; if (!attr->vlan_proto[vlan_idx]) attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); @@ -2454,13 +2440,15 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || - tcf_vlan_push_prio(a))) + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) return -EOPNOTSUPP; *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } - } else { /* action is TCA_VLAN_ACT_MODIFY */ + break; + default: + /* action is FLOW_ACT_VLAN_MANGLE */ return -EOPNOTSUPP; } @@ -2469,59 +2457,56 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } -static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - struct pedit_headers_action hdrs[__PEDIT_CMD_MAX] = {}; + struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct ip_tunnel_info *info = NULL; - const struct tc_action *a; + const struct ip_tunnel_info *info = NULL; + const struct flow_action_entry *act; bool encap = false; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; attr->split_count = attr->out_count; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), - extack)) - continue; + act->csum_flags, extack)) + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: { struct mlx5e_priv *out_priv; struct net_device *out_dev; - out_dev = tcf_mirred_dev(a); + out_dev = act->dev; if (!out_dev) { /* out_dev is NULL when filters with * non-existing mirred device are replayed to @@ -2586,35 +2571,29 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, priv->netdev->name, out_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_tunnel_set(a)) { - info = tcf_tunnel_info(a); + } + break; + case FLOW_ACTION_TUNNEL_ENCAP: + info = act->tunnel; if (info) encap = true; else return -EOPNOTSUPP; - continue; - } - - if (is_tcf_vlan(a)) { - err = parse_tc_vlan_action(priv, a, attr, &action); + break; + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + err = parse_tc_vlan_action(priv, act, attr, &action); if (err) return err; attr->split_count = attr->out_count; - continue; - } - - if (is_tcf_tunnel_release(a)) { + break; + case FLOW_ACTION_TUNNEL_DECAP: action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - continue; - } - - if (is_tcf_gact_goto_chain(a)) { - u32 dest_chain = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 dest_chain = act->chain_index; u32 max_chain = mlx5_eswitch_get_chain_range(esw); if (dest_chain <= attr->chain) { @@ -2627,11 +2606,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; - - continue; + break; + } + default: + return -EINVAL; } - - return -EINVAL; } if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || @@ -2643,7 +2622,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; if (attr->dest_chain) { @@ -2754,6 +2733,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -2775,7 +2755,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, flow->esw_attr->chain = f->common.chain_index; flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; @@ -2891,6 +2871,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -2913,7 +2894,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index a69e3462b65e..2649d128a57c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -588,7 +588,7 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, { u8 ethertype; - if (action == TCA_VLAN_ACT_MODIFY) { + if (action == FLOW_ACTION_VLAN_MANGLE) { switch (proto) { case ETH_P_8021Q: ethertype = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index c090ecb62041..9af9f5c1b25c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -17,13 +17,13 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_acl_rule_info *rulei, - struct tcf_exts *exts, + struct flow_action *flow_action, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return 0; /* Count action is inserted first */ @@ -31,27 +31,31 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_ok(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_ACCEPT: err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action"); return err; } - } else if (is_tcf_gact_shot(a)) { + break; + case FLOW_ACTION_DROP: err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); return err; } - } else if (is_tcf_gact_trap(a)) { + break; + case FLOW_ACTION_TRAP: err = mlxsw_sp_acl_rulei_act_trap(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action"); return err; } - } else if (is_tcf_gact_goto_chain(a)) { - u32 chain_index = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 chain_index = act->chain_index; struct mlxsw_sp_acl_ruleset *ruleset; u16 group_id; @@ -67,7 +71,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action"); return err; } - } else if (is_tcf_mirred_egress_redirect(a)) { + } + break; + case FLOW_ACTION_REDIRECT: { struct net_device *out_dev; struct mlxsw_sp_fid *fid; u16 fid_index; @@ -79,29 +85,34 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - out_dev = tcf_mirred_dev(a); + out_dev = act->dev; err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei, out_dev, extack); if (err) return err; - } else if (is_tcf_mirred_egress_mirror(a)) { - struct net_device *out_dev = tcf_mirred_dev(a); + } + break; + case FLOW_ACTION_MIRRED: { + struct net_device *out_dev = act->dev; err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, block, out_dev, extack); if (err) return err; - } else if (is_tcf_vlan(a)) { - u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); - u32 action = tcf_vlan_action(a); - u8 prio = tcf_vlan_push_prio(a); - u16 vid = tcf_vlan_push_vid(a); + } + break; + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: { + u16 proto = be16_to_cpu(act->vlan.proto); + u8 prio = act->vlan.prio; + u16 vid = act->vlan.vid; return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - action, vid, + act->id, vid, proto, prio, extack); - } else { + } + default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; @@ -361,7 +372,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; - return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts, + return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, + &f->rule->action, f->common.extack); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 43192640bdd1..6b1d56b021ac 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -37,7 +37,7 @@ static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) static void nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, - const struct tc_action *action) + const struct flow_action_entry *act) { size_t act_size = sizeof(struct nfp_fl_push_vlan); u16 tmp_push_vlan_tci; @@ -45,17 +45,17 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; push_vlan->reserved = 0; - push_vlan->vlan_tpid = tcf_vlan_push_proto(action); + push_vlan->vlan_tpid = act->vlan.proto; tmp_push_vlan_tci = - FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid) | NFP_FL_PUSH_VLAN_CFI; push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } static int -nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action, +nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_fl_payload *nfp_flow, int act_len) { size_t act_size = sizeof(struct nfp_fl_pre_lag); @@ -63,7 +63,7 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action, struct net_device *out_dev; int err; - out_dev = tcf_mirred_dev(action); + out_dev = act->dev; if (!out_dev || !netif_is_lag_master(out_dev)) return 0; @@ -92,7 +92,8 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action, static int nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, - const struct tc_action *action, struct nfp_fl_payload *nfp_flow, + const struct flow_action_entry *act, + struct nfp_fl_payload *nfp_flow, bool last, struct net_device *in_dev, enum nfp_flower_tun_type tun_type, int *tun_out_cnt) { @@ -104,7 +105,7 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT; output->head.len_lw = act_size >> NFP_FL_LW_SIZ; - out_dev = tcf_mirred_dev(action); + out_dev = act->dev; if (!out_dev) return -EOPNOTSUPP; @@ -155,9 +156,9 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, static enum nfp_flower_tun_type nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app, - const struct tc_action *action) + const struct flow_action_entry *act) { - struct ip_tunnel_info *tun = tcf_tunnel_info(action); + const struct ip_tunnel_info *tun = act->tunnel; struct nfp_flower_priv *priv = app->priv; switch (tun->key.tp_dst) { @@ -195,9 +196,9 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) static int nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, - const struct tc_action *action) + const struct flow_action_entry *act) { - struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel; int opt_len, opt_cnt, act_start, tot_push_len; u8 *src = ip_tunnel_info_opts(ip_tun); @@ -259,13 +260,13 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, static int nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_udp_tun *set_tun, - const struct tc_action *action, + const struct flow_action_entry *act, struct nfp_fl_pre_tunnel *pre_tun, enum nfp_flower_tun_type tun_type, struct net_device *netdev) { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); - struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + const struct ip_tunnel_info *ip_tun = act->tunnel; struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ @@ -345,7 +346,7 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) } static int -nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, +nfp_fl_set_eth(const struct flow_action_entry *act, int idx, u32 off, struct nfp_fl_set_eth *set_eth) { u32 exact, mask; @@ -353,8 +354,8 @@ nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, if (off + 4 > ETH_ALEN * 2) return -EOPNOTSUPP; - mask = ~tcf_pedit_mask(action, idx); - exact = tcf_pedit_val(action, idx); + mask = ~act->mangle.mask; + exact = act->mangle.val; if (exact & ~mask) return -EOPNOTSUPP; @@ -376,7 +377,7 @@ struct ipv4_ttl_word { }; static int -nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, +nfp_fl_set_ip4(const struct flow_action_entry *act, int idx, u32 off, struct nfp_fl_set_ip4_addrs *set_ip_addr, struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos) { @@ -387,8 +388,8 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, __be32 exact, mask; /* We are expecting tcf_pedit to return a big endian value */ - mask = (__force __be32)~tcf_pedit_mask(action, idx); - exact = (__force __be32)tcf_pedit_val(action, idx); + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; if (exact & ~mask) return -EOPNOTSUPP; @@ -505,7 +506,7 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, } static int -nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, +nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off, struct nfp_fl_set_ipv6_addr *ip_dst, struct nfp_fl_set_ipv6_addr *ip_src, struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl) @@ -515,8 +516,8 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, u8 word; /* We are expecting tcf_pedit to return a big endian value */ - mask = (__force __be32)~tcf_pedit_mask(action, idx); - exact = (__force __be32)tcf_pedit_val(action, idx); + mask = (__force __be32)~act->mangle.mask; + exact = (__force __be32)act->mangle.val; if (exact & ~mask) return -EOPNOTSUPP; @@ -541,7 +542,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, } static int -nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, +nfp_fl_set_tport(const struct flow_action_entry *act, int idx, u32 off, struct nfp_fl_set_tport *set_tport, int opcode) { u32 exact, mask; @@ -549,8 +550,8 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, if (off) return -EOPNOTSUPP; - mask = ~tcf_pedit_mask(action, idx); - exact = tcf_pedit_val(action, idx); + mask = ~act->mangle.mask; + exact = act->mangle.val; if (exact & ~mask) return -EOPNOTSUPP; @@ -584,7 +585,8 @@ static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto) } static int -nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, +nfp_fl_pedit(const struct flow_action_entry *act, + struct tc_cls_flower_offload *flow, char *nfp_action, int *a_len, u32 *csum_updated) { struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); @@ -592,13 +594,13 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl; struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos; struct nfp_fl_set_ip4_addrs set_ip_addr; + enum flow_action_mangle_base htype; struct nfp_fl_set_tport set_tport; struct nfp_fl_set_eth set_eth; - enum pedit_header_type htype; - int idx, nkeys, err; size_t act_size = 0; - u32 offset, cmd; u8 ip_proto = 0; + int idx, err; + u32 offset; memset(&set_ip6_tc_hl_fl, 0, sizeof(set_ip6_tc_hl_fl)); memset(&set_ip_ttl_tos, 0, sizeof(set_ip_ttl_tos)); @@ -607,42 +609,35 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, memset(&set_ip_addr, 0, sizeof(set_ip_addr)); memset(&set_tport, 0, sizeof(set_tport)); memset(&set_eth, 0, sizeof(set_eth)); - nkeys = tcf_pedit_nkeys(action); - for (idx = 0; idx < nkeys; idx++) { - cmd = tcf_pedit_cmd(action, idx); - htype = tcf_pedit_htype(action, idx); - offset = tcf_pedit_offset(action, idx); + htype = act->mangle.htype; + offset = act->mangle.offset; - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) - return -EOPNOTSUPP; - - switch (htype) { - case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - err = nfp_fl_set_eth(action, idx, offset, &set_eth); - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: - err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr, - &set_ip_ttl_tos); - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: - err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst, - &set_ip6_src, &set_ip6_tc_hl_fl); - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: - err = nfp_fl_set_tport(action, idx, offset, &set_tport, - NFP_FL_ACTION_OPCODE_SET_TCP); - break; - case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - err = nfp_fl_set_tport(action, idx, offset, &set_tport, - NFP_FL_ACTION_OPCODE_SET_UDP); - break; - default: - return -EOPNOTSUPP; - } - if (err) - return err; + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + err = nfp_fl_set_eth(act, idx, offset, &set_eth); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + err = nfp_fl_set_ip4(act, idx, offset, &set_ip_addr, + &set_ip_ttl_tos); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + err = nfp_fl_set_ip6(act, idx, offset, &set_ip6_dst, + &set_ip6_src, &set_ip6_tc_hl_fl); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + err = nfp_fl_set_tport(act, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_TCP); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + err = nfp_fl_set_tport(act, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_UDP); + break; + default: + return -EOPNOTSUPP; } + if (err) + return err; if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -732,7 +727,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, } static int -nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, +nfp_flower_output_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, bool last, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, @@ -752,7 +747,7 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(app, output, a, nfp_fl, last, netdev, *tun_type, + err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type, tun_out_cnt); if (err) return err; @@ -763,7 +758,7 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, /* nfp_fl_pre_lag returns -err or size of prelag action added. * This will be 0 if it is not egressing to a lag dev. */ - prelag_size = nfp_fl_pre_lag(app, a, nfp_fl, *a_len); + prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len); if (prelag_size < 0) return prelag_size; else if (prelag_size > 0 && (!last || *out_cnt)) @@ -777,7 +772,7 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, } static int -nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, +nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, @@ -790,23 +785,25 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, struct nfp_fl_pop_vlan *pop_v; int err; - if (is_tcf_gact_shot(a)) { + switch (act->id) { + case FLOW_ACTION_DROP: nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP); - } else if (is_tcf_mirred_egress_redirect(a)) { - err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, + break; + case FLOW_ACTION_REDIRECT: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, true, tun_type, tun_out_cnt, out_cnt, csum_updated); if (err) return err; - - } else if (is_tcf_mirred_egress_mirror(a)) { - err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, + break; + case FLOW_ACTION_MIRRED: + err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, false, tun_type, tun_out_cnt, out_cnt, csum_updated); if (err) return err; - - } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + break; + case FLOW_ACTION_VLAN_POP: if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) return -EOPNOTSUPP; @@ -815,19 +812,21 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl_pop_vlan(pop_v); *a_len += sizeof(struct nfp_fl_pop_vlan); - } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { + break; + case FLOW_ACTION_VLAN_PUSH: if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) return -EOPNOTSUPP; psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len]; nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); - nfp_fl_push_vlan(psh_v, a); + nfp_fl_push_vlan(psh_v, act); *a_len += sizeof(struct nfp_fl_push_vlan); - } else if (is_tcf_tunnel_set(a)) { - struct ip_tunnel_info *ip_tun = tcf_tunnel_info(a); + break; + case FLOW_ACTION_TUNNEL_ENCAP: { + const struct ip_tunnel_info *ip_tun = act->tunnel; - *tun_type = nfp_fl_get_tun_from_act_l4_port(app, a); + *tun_type = nfp_fl_get_tun_from_act_l4_port(app, act); if (*tun_type == NFP_FL_TUNNEL_NONE) return -EOPNOTSUPP; @@ -846,32 +845,36 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); - err = nfp_fl_push_geneve_options(nfp_fl, a_len, a); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, act); if (err) return err; set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(app, set_tun, a, pre_tun, + err = nfp_fl_set_ipv4_udp_tun(app, set_tun, act, pre_tun, *tun_type, netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); - } else if (is_tcf_tunnel_release(a)) { + } + break; + case FLOW_ACTION_TUNNEL_DECAP: /* Tunnel decap is handled by default so accept action. */ return 0; - } else if (is_tcf_pedit(a)) { - if (nfp_fl_pedit(a, flow, &nfp_fl->action_data[*a_len], + case FLOW_ACTION_MANGLE: + if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len], a_len, csum_updated)) return -EOPNOTSUPP; - } else if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: /* csum action requests recalc of something we have not fixed */ - if (tcf_csum_update_flags(a) & ~*csum_updated) + if (act->csum_flags & ~*csum_updated) return -EOPNOTSUPP; /* If we will correctly fix the csum we can remove it from the * csum update list. Which will later be used to check support. */ - *csum_updated &= ~tcf_csum_update_flags(a); - } else { + *csum_updated &= ~act->csum_flags; + break; + default: /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; } @@ -886,7 +889,7 @@ int nfp_flower_compile_action(struct nfp_app *app, { int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; enum nfp_flower_tun_type tun_type; - const struct tc_action *a; + struct flow_action_entry *act; u32 csum_updated = 0; memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); @@ -897,8 +900,8 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - tcf_exts_for_each_action(i, a, flow->exts) { - err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len, + flow_action_for_each(i, act, &flow->rule->action) { + err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated); if (err) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 81d5b9304229..833c9ec58a6e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2004,21 +2004,21 @@ unlock: } static int qede_parse_actions(struct qede_dev *edev, - struct tcf_exts *exts) + struct flow_action *flow_action) { + const struct flow_action_entry *act; int rc = -EINVAL, num_act = 0, i; - const struct tc_action *a; bool is_drop = false; - if (!tcf_exts_has_actions(exts)) { + if (!flow_action_has_entries(flow_action)) { DP_NOTICE(edev, "No tc actions received\n"); return rc; } - tcf_exts_for_each_action(i, a, exts) { + flow_action_for_each(i, act, flow_action) { num_act++; - if (is_tcf_gact_shot(a)) + if (act->id == FLOW_ACTION_DROP) is_drop = true; } @@ -2235,7 +2235,7 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, f->exts)) + if (qede_parse_actions(edev, &f->rule->action)) goto unlock; if (qede_flow_find_fltr(edev, &t)) { From 2cd173e6d542f8e96369608db849b71135d65afb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:49 +0100 Subject: [PATCH 0683/1436] cls_flower: don't expose TC actions to drivers anymore Now that drivers have been converted to use the flow action infrastructure, remove this field from the tc_cls_flower_offload structure. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 - net/sched/cls_flower.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bea1b1c3ea9b..cb8be396a11f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -766,7 +766,6 @@ struct tc_cls_flower_offload { unsigned long cookie; struct flow_rule *rule; struct flow_stats stats; - struct tcf_exts *exts; u32 classid; }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8ec85056aa0d..c5d1db3a3db7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -391,7 +391,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.dissector = &f->mask->dissector; cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); @@ -425,7 +424,6 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; - cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); @@ -1492,7 +1490,6 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, cls_flower.rule->match.dissector = &mask->dissector; cls_flower.rule->match.mask = &mask->key; cls_flower.rule->match.key = &f->mkey; - cls_flower.exts = &f->exts; err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); @@ -1525,7 +1522,6 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = chain->block; - struct tcf_exts dummy_exts = { 0, }; cls_flower.rule = flow_rule_alloc(0); if (!cls_flower.rule) @@ -1537,7 +1533,6 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, cls_flower.rule->match.dissector = &tmplt->dissector; cls_flower.rule->match.mask = &tmplt->mask; cls_flower.rule->match.key = &tmplt->dummy_key; - cls_flower.exts = &dummy_exts; /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. From 8bec2833fb28d7c8fcc25127190abbe3eb0ca260 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:50 +0100 Subject: [PATCH 0684/1436] flow_offload: add wake-up-on-lan and queue to flow_action These actions need to be added to support the ethtool_rx_flow interface. The queue action includes a field to specify the RSS context, that is set via FLOW_RSS flow type flag and the rss_context field in struct ethtool_rxnfc, plus the corresponding queue index. FLOW_RSS implies that rss_context is non-zero, therefore, queue.ctx == 0 means that FLOW_RSS was not set. Also add a field to store the vf index which is stored in the ethtool_rxnfc ring_cookie field. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f9ce39992dbd..23166caa0da5 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -116,6 +116,8 @@ enum flow_action_id { FLOW_ACTION_ADD, FLOW_ACTION_CSUM, FLOW_ACTION_MARK, + FLOW_ACTION_WAKE, + FLOW_ACTION_QUEUE, }; /* This is mirroring enum pedit_header_type definition for easy mapping between @@ -150,6 +152,11 @@ struct flow_action_entry { const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ + struct { /* FLOW_ACTION_QUEUE */ + u32 ctx; + u32 index; + u8 vf; + } queue; }; }; From eca4205f9ec3bea2d5aad0493c19f5d2675a20fc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:51 +0100 Subject: [PATCH 0685/1436] ethtool: add ethtool_rx_flow_spec to flow_rule structure translator This patch adds a function to translate the ethtool_rx_flow_spec structure to the flow_rule representation. This allows us to reuse code from the driver side given that both flower and ethtool_rx_flow interfaces use the same representation. This patch also includes support for the flow type flags FLOW_EXT, FLOW_MAC_EXT and FLOW_RSS. The ethtool_rx_flow_spec_input wrapper structure is used to convey the rss_context field, that is away from the ethtool_rx_flow_spec structure, and the ethtool_rx_flow_spec structure. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/ethtool.h | 15 +++ net/core/ethtool.c | 241 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index afd9596ce636..19a8de5326fb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -400,4 +400,19 @@ struct ethtool_ops { void (*get_ethtool_phy_stats)(struct net_device *, struct ethtool_stats *, u64 *); }; + +struct ethtool_rx_flow_rule { + struct flow_rule *rule; + unsigned long priv[0]; +}; + +struct ethtool_rx_flow_spec_input { + const struct ethtool_rx_flow_spec *fs; + u32 rss_ctx; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); + #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 45c0a6e3d6ad..0fbf39239b29 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Some useful ethtool_ops methods that're device independent. @@ -2820,3 +2821,243 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return rc; } + +struct ethtool_rx_flow_key { + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_eth_addrs eth_addrs; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct ethtool_rx_flow_match { + struct flow_dissector dissector; + struct ethtool_rx_flow_key key; + struct ethtool_rx_flow_key mask; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) +{ + const struct ethtool_rx_flow_spec *fs = input->fs; + static struct in6_addr zero_addr = {}; + struct ethtool_rx_flow_match *match; + struct ethtool_rx_flow_rule *flow; + struct flow_action_entry *act; + + flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + + sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + /* ethtool_rx supports only one single action per rule. */ + flow->rule = flow_rule_alloc(1); + if (!flow->rule) { + kfree(flow); + return ERR_PTR(-ENOMEM); + } + + match = (struct ethtool_rx_flow_match *)flow->priv; + flow->rule->match.dissector = &match->dissector; + flow->rule->match.mask = &match->mask; + flow->rule->match.key = &match->key; + + match->mask.basic.n_proto = htons(0xffff); + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: { + const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IP); + + v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; + + if (v4_m_spec->ip4src) { + match->key.ipv4.src = v4_spec->ip4src; + match->mask.ipv4.src = v4_m_spec->ip4src; + } + if (v4_m_spec->ip4dst) { + match->key.ipv4.dst = v4_spec->ip4dst; + match->mask.ipv4.dst = v4_m_spec->ip4dst; + } + if (v4_m_spec->ip4src || + v4_m_spec->ip4dst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv4); + } + if (v4_m_spec->psrc) { + match->key.tp.src = v4_spec->psrc; + match->mask.tp.src = v4_m_spec->psrc; + } + if (v4_m_spec->pdst) { + match->key.tp.dst = v4_spec->pdst; + match->mask.tp.dst = v4_m_spec->pdst; + } + if (v4_m_spec->psrc || + v4_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v4_m_spec->tos) { + match->key.ip.tos = v4_spec->tos; + match->mask.ip.tos = v4_m_spec->tos; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: { + const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IPV6); + + v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.src, v6_spec->ip6src, + sizeof(match->key.ipv6.src)); + memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, + sizeof(match->mask.ipv6.src)); + } + if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, + sizeof(match->key.ipv6.dst)); + memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, + sizeof(match->mask.ipv6.dst)); + } + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || + memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv6); + } + if (v6_m_spec->psrc) { + match->key.tp.src = v6_spec->psrc; + match->mask.tp.src = v6_m_spec->psrc; + } + if (v6_m_spec->pdst) { + match->key.tp.dst = v6_spec->pdst; + match->mask.tp.dst = v6_m_spec->pdst; + } + if (v6_m_spec->psrc || + v6_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v6_m_spec->tclass) { + match->key.ip.tos = v6_spec->tclass; + match->mask.ip.tos = v6_m_spec->tclass; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + default: + ethtool_rx_flow_rule_destroy(flow); + return ERR_PTR(-EINVAL); + } + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_TCP; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_UDP; + break; + } + match->mask.basic.ip_proto = 0xff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); + match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = + offsetof(struct ethtool_rx_flow_key, basic); + + if (fs->flow_type & FLOW_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->vlan_etype && + ext_m_spec->vlan_tci) { + match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; + match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; + + match->key.vlan.vlan_id = + ntohs(ext_h_spec->vlan_tci) & 0x0fff; + match->mask.vlan.vlan_id = + ntohs(ext_m_spec->vlan_tci) & 0x0fff; + + match->key.vlan.vlan_priority = + (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; + match->mask.vlan.vlan_priority = + (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_VLAN); + match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = + offsetof(struct ethtool_rx_flow_key, vlan); + } + } + if (fs->flow_type & FLOW_MAC_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->h_dest) { + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); + } + } + + act = &flow->rule->action.entries[0]; + switch (fs->ring_cookie) { + case RX_CLS_FLOW_DISC: + act->id = FLOW_ACTION_DROP; + break; + case RX_CLS_FLOW_WAKE: + act->id = FLOW_ACTION_WAKE; + break; + default: + act->id = FLOW_ACTION_QUEUE; + if (fs->flow_type & FLOW_RSS) + act->queue.ctx = input->rss_ctx; + + act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); + break; + } + + return flow; +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_create); + +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) +{ + kfree(flow->rule); + kfree(flow); +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); From e4f7ef54cbd8c8fd808e176c7feb8b614f0034fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:52 +0100 Subject: [PATCH 0686/1436] dsa: bcm_sf2: use flow_rule infrastructure Update this driver to use the flow_rule infrastructure, hence we can use the same code to populate hardware IR from ethtool_rx_flow and the cls_flower interfaces. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 102 ++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index e14663ab6dbc..6d8059dc77b7 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" @@ -257,7 +258,8 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, } static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, - struct ethtool_tcpip4_spec *v4_spec, + struct flow_dissector_key_ipv4_addrs *addrs, + struct flow_dissector_key_ports *ports, unsigned int slice_num, bool mask) { @@ -278,7 +280,7 @@ static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, * UDF_n_A6 [23:8] * UDF_n_A5 [7:0] */ - reg = be16_to_cpu(v4_spec->pdst) >> 8; + reg = be16_to_cpu(ports->dst) >> 8; if (mask) offset = CORE_CFP_MASK_PORT(3); else @@ -289,9 +291,9 @@ static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, * UDF_n_A4 [23:8] * UDF_n_A3 [7:0] */ - reg = (be16_to_cpu(v4_spec->pdst) & 0xff) << 24 | - (u32)be16_to_cpu(v4_spec->psrc) << 8 | - (be32_to_cpu(v4_spec->ip4dst) & 0x0000ff00) >> 8; + reg = (be16_to_cpu(ports->dst) & 0xff) << 24 | + (u32)be16_to_cpu(ports->src) << 8 | + (be32_to_cpu(addrs->dst) & 0x0000ff00) >> 8; if (mask) offset = CORE_CFP_MASK_PORT(2); else @@ -302,9 +304,9 @@ static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, * UDF_n_A2 [23:8] * UDF_n_A1 [7:0] */ - reg = (u32)(be32_to_cpu(v4_spec->ip4dst) & 0xff) << 24 | - (u32)(be32_to_cpu(v4_spec->ip4dst) >> 16) << 8 | - (be32_to_cpu(v4_spec->ip4src) & 0x0000ff00) >> 8; + reg = (u32)(be32_to_cpu(addrs->dst) & 0xff) << 24 | + (u32)(be32_to_cpu(addrs->dst) >> 16) << 8 | + (be32_to_cpu(addrs->src) & 0x0000ff00) >> 8; if (mask) offset = CORE_CFP_MASK_PORT(1); else @@ -317,8 +319,8 @@ static void bcm_sf2_cfp_slice_ipv4(struct bcm_sf2_priv *priv, * Slice ID [3:2] * Slice valid [1:0] */ - reg = (u32)(be32_to_cpu(v4_spec->ip4src) & 0xff) << 24 | - (u32)(be32_to_cpu(v4_spec->ip4src) >> 16) << 8 | + reg = (u32)(be32_to_cpu(addrs->src) & 0xff) << 24 | + (u32)(be32_to_cpu(addrs->src) >> 16) << 8 | SLICE_NUM(slice_num) | SLICE_VALID; if (mask) offset = CORE_CFP_MASK_PORT(0); @@ -332,9 +334,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, unsigned int queue_num, struct ethtool_rx_flow_spec *fs) { - struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; + struct ethtool_rx_flow_spec_input input = {}; const struct cfp_udf_layout *layout; unsigned int slice_num, rule_index; + struct ethtool_rx_flow_rule *flow; + struct flow_match_ipv4_addrs ipv4; + struct flow_match_ports ports; + struct flow_match_ip ip; u8 ip_proto, ip_frag; u8 num_udf; u32 reg; @@ -343,13 +349,9 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, switch (fs->flow_type & ~FLOW_EXT) { case TCP_V4_FLOW: ip_proto = IPPROTO_TCP; - v4_spec = &fs->h_u.tcp_ip4_spec; - v4_m_spec = &fs->m_u.tcp_ip4_spec; break; case UDP_V4_FLOW: ip_proto = IPPROTO_UDP; - v4_spec = &fs->h_u.udp_ip4_spec; - v4_m_spec = &fs->m_u.udp_ip4_spec; break; default: return -EINVAL; @@ -367,11 +369,22 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, if (rule_index > bcm_sf2_cfp_rule_size(priv)) return -ENOSPC; + input.fs = fs; + flow = ethtool_rx_flow_rule_create(&input); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + flow_rule_match_ipv4_addrs(flow->rule, &ipv4); + flow_rule_match_ports(flow->rule, &ports); + flow_rule_match_ip(flow->rule, &ip); + layout = &udf_tcpip4_layout; /* We only use one UDF slice for now */ slice_num = bcm_sf2_get_slice_number(layout, 0); - if (slice_num == UDF_NUM_SLICES) - return -EINVAL; + if (slice_num == UDF_NUM_SLICES) { + ret = -EINVAL; + goto out_err_flow_rule; + } num_udf = bcm_sf2_get_num_udf_slices(layout->udfs[slice_num].slices); @@ -398,7 +411,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, * Reserved [1] * UDF_Valid[8] [0] */ - core_writel(priv, v4_spec->tos << IPTOS_SHIFT | + core_writel(priv, ip.key->tos << IPTOS_SHIFT | ip_proto << IPPROTO_SHIFT | ip_frag << IP_FRAG_SHIFT | udf_upper_bits(num_udf), CORE_CFP_DATA_PORT(6)); @@ -417,8 +430,8 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); /* Program the match and the mask */ - bcm_sf2_cfp_slice_ipv4(priv, v4_spec, slice_num, false); - bcm_sf2_cfp_slice_ipv4(priv, v4_m_spec, SLICE_NUM_MASK, true); + bcm_sf2_cfp_slice_ipv4(priv, ipv4.key, ports.key, slice_num, false); + bcm_sf2_cfp_slice_ipv4(priv, ipv4.mask, ports.mask, SLICE_NUM_MASK, true); /* Insert into TCAM now */ bcm_sf2_cfp_rule_addr_set(priv, rule_index); @@ -426,14 +439,14 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | TCAM_SEL); if (ret) { pr_err("TCAM entry at addr %d failed\n", rule_index); - return ret; + goto out_err_flow_rule; } /* Insert into Action and policer RAMs now */ ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, queue_num, true); if (ret) - return ret; + goto out_err_flow_rule; /* Turn on CFP for this rule now */ reg = core_readl(priv, CORE_CFP_CTL_REG); @@ -446,6 +459,10 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, fs->location = rule_index; return 0; + +out_err_flow_rule: + ethtool_rx_flow_rule_destroy(flow); + return ret; } static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, @@ -582,8 +599,12 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, struct ethtool_rx_flow_spec *fs) { struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; + struct ethtool_rx_flow_spec_input input = {}; unsigned int slice_num, rule_index[2]; const struct cfp_udf_layout *layout; + struct ethtool_rx_flow_rule *flow; + struct flow_match_ipv6_addrs ipv6; + struct flow_match_ports ports; u8 ip_proto, ip_frag; int ret = 0; u8 num_udf; @@ -645,6 +666,15 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, goto out_err; } + input.fs = fs; + flow = ethtool_rx_flow_rule_create(&input); + if (IS_ERR(flow)) { + ret = PTR_ERR(flow); + goto out_err; + } + flow_rule_match_ipv6_addrs(flow->rule, &ipv6); + flow_rule_match_ports(flow->rule, &ports); + /* Apply the UDF layout for this filter */ bcm_sf2_cfp_udf_set(priv, layout, slice_num); @@ -688,10 +718,10 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, core_writel(priv, udf_lower_bits(num_udf) << 24, CORE_CFP_MASK_PORT(5)); /* Slice the IPv6 source address and port */ - bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6src, v6_spec->psrc, - slice_num, false); - bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6src, v6_m_spec->psrc, - SLICE_NUM_MASK, true); + bcm_sf2_cfp_slice_ipv6(priv, ipv6.key->src.in6_u.u6_addr32, + ports.key->src, slice_num, false); + bcm_sf2_cfp_slice_ipv6(priv, ipv6.mask->src.in6_u.u6_addr32, + ports.mask->src, SLICE_NUM_MASK, true); /* Insert into TCAM now because we need to insert a second rule */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[0]); @@ -699,20 +729,20 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | TCAM_SEL); if (ret) { pr_err("TCAM entry at addr %d failed\n", rule_index[0]); - goto out_err; + goto out_err_flow_rule; } /* Insert into Action and policer RAMs now */ ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, queue_num, false); if (ret) - goto out_err; + goto out_err_flow_rule; /* Now deal with the second slice to chain this rule */ slice_num = bcm_sf2_get_slice_number(layout, slice_num + 1); if (slice_num == UDF_NUM_SLICES) { ret = -EINVAL; - goto out_err; + goto out_err_flow_rule; } num_udf = bcm_sf2_get_num_udf_slices(layout->udfs[slice_num].slices); @@ -748,10 +778,10 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Mask all */ core_writel(priv, 0, CORE_CFP_MASK_PORT(5)); - bcm_sf2_cfp_slice_ipv6(priv, v6_spec->ip6dst, v6_spec->pdst, slice_num, - false); - bcm_sf2_cfp_slice_ipv6(priv, v6_m_spec->ip6dst, v6_m_spec->pdst, - SLICE_NUM_MASK, true); + bcm_sf2_cfp_slice_ipv6(priv, ipv6.key->dst.in6_u.u6_addr32, + ports.key->dst, slice_num, false); + bcm_sf2_cfp_slice_ipv6(priv, ipv6.mask->dst.in6_u.u6_addr32, + ports.key->dst, SLICE_NUM_MASK, true); /* Insert into TCAM now */ bcm_sf2_cfp_rule_addr_set(priv, rule_index[1]); @@ -759,7 +789,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, ret = bcm_sf2_cfp_op(priv, OP_SEL_WRITE | TCAM_SEL); if (ret) { pr_err("TCAM entry at addr %d failed\n", rule_index[1]); - goto out_err; + goto out_err_flow_rule; } /* Insert into Action and policer RAMs now, set chain ID to @@ -768,7 +798,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num, queue_num, true); if (ret) - goto out_err; + goto out_err_flow_rule; /* Turn on CFP for this rule now */ reg = core_readl(priv, CORE_CFP_CTL_REG); @@ -784,6 +814,8 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, return ret; +out_err_flow_rule: + ethtool_rx_flow_rule_destroy(flow); out_err: clear_bit(rule_index[1], priv->cfp.used); return ret; From 5bdf4120ae9b661b017266beeabdf4b8814b97fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:53 +0100 Subject: [PATCH 0687/1436] qede: place ethtool_rx_flow_spec after code after TC flower codebase This is a preparation patch to reuse the existing TC flower codebase from ethtool_rx_flow_spec. This patch is merely moving the core ethtool_rx_flow_spec parser after tc flower offload driver code so we can skip a few forward function declarations in the follow up patch. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qede/qede_filter.c | 264 +++++++++--------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 833c9ec58a6e..ed77950f6cf9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1791,72 +1791,6 @@ static int qede_flow_spec_to_tuple_udpv6(struct qede_dev *edev, return 0; } -static int qede_flow_spec_to_tuple(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - memset(t, 0, sizeof(*t)); - - if (qede_flow_spec_validate_unused(edev, fs)) - return -EOPNOTSUPP; - - switch ((fs->flow_type & ~FLOW_EXT)) { - case TCP_V4_FLOW: - return qede_flow_spec_to_tuple_tcpv4(edev, t, fs); - case UDP_V4_FLOW: - return qede_flow_spec_to_tuple_udpv4(edev, t, fs); - case TCP_V6_FLOW: - return qede_flow_spec_to_tuple_tcpv6(edev, t, fs); - case UDP_V6_FLOW: - return qede_flow_spec_to_tuple_udpv6(edev, t, fs); - default: - DP_VERBOSE(edev, NETIF_MSG_IFUP, - "Can't support flow of type %08x\n", fs->flow_type); - return -EOPNOTSUPP; - } - - return 0; -} - -static int qede_flow_spec_validate(struct qede_dev *edev, - struct ethtool_rx_flow_spec *fs, - struct qede_arfs_tuple *t) -{ - if (fs->location >= QEDE_RFS_MAX_FLTR) { - DP_INFO(edev, "Location out-of-bounds\n"); - return -EINVAL; - } - - /* Check location isn't already in use */ - if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) { - DP_INFO(edev, "Location already in use\n"); - return -EINVAL; - } - - /* Check if the filtering-mode could support the filter */ - if (edev->arfs->filter_count && - edev->arfs->mode != t->mode) { - DP_INFO(edev, - "flow_spec would require filtering mode %08x, but %08x is configured\n", - t->mode, edev->arfs->filter_count); - return -EINVAL; - } - - /* If drop requested then no need to validate other data */ - if (fs->ring_cookie == RX_CLS_FLOW_DISC) - return 0; - - if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie)) - return 0; - - if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) { - DP_INFO(edev, "Queue out-of-bounds\n"); - return -EINVAL; - } - - return 0; -} - /* Must be called while qede lock is held */ static struct qede_arfs_fltr_node * qede_flow_find_fltr(struct qede_dev *edev, struct qede_arfs_tuple *t) @@ -1896,72 +1830,6 @@ static void qede_flow_set_destination(struct qede_dev *edev, "Configuring N-tuple for VF 0x%02x\n", n->vfid - 1); } -int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) -{ - struct ethtool_rx_flow_spec *fsp = &info->fs; - struct qede_arfs_fltr_node *n; - struct qede_arfs_tuple t; - int min_hlen, rc; - - __qede_lock(edev); - - if (!edev->arfs) { - rc = -EPERM; - goto unlock; - } - - /* Translate the flow specification into something fittign our DB */ - rc = qede_flow_spec_to_tuple(edev, &t, fsp); - if (rc) - goto unlock; - - /* Make sure location is valid and filter isn't already set */ - rc = qede_flow_spec_validate(edev, fsp, &t); - if (rc) - goto unlock; - - if (qede_flow_find_fltr(edev, &t)) { - rc = -EINVAL; - goto unlock; - } - - n = kzalloc(sizeof(*n), GFP_KERNEL); - if (!n) { - rc = -ENOMEM; - goto unlock; - } - - min_hlen = qede_flow_get_min_header_size(&t); - n->data = kzalloc(min_hlen, GFP_KERNEL); - if (!n->data) { - kfree(n); - rc = -ENOMEM; - goto unlock; - } - - n->sw_id = fsp->location; - set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap); - n->buf_len = min_hlen; - - memcpy(&n->tuple, &t, sizeof(n->tuple)); - - qede_flow_set_destination(edev, n, fsp); - - /* Build a minimal header according to the flow */ - n->tuple.build_hdr(&n->tuple, n->data); - - rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0); - if (rc) - goto unlock; - - qede_configure_arfs_fltr(edev, n, n->rxq_id, true); - rc = qede_poll_arfs_filter_config(edev, n); -unlock: - __qede_unlock(edev); - - return rc; -} - int qede_delete_flow_filter(struct qede_dev *edev, u64 cookie) { struct qede_arfs_fltr_node *fltr = NULL; @@ -2277,3 +2145,135 @@ unlock: __qede_unlock(edev); return rc; } + +static int qede_flow_spec_validate(struct qede_dev *edev, + struct ethtool_rx_flow_spec *fs, + struct qede_arfs_tuple *t) +{ + if (fs->location >= QEDE_RFS_MAX_FLTR) { + DP_INFO(edev, "Location out-of-bounds\n"); + return -EINVAL; + } + + /* Check location isn't already in use */ + if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) { + DP_INFO(edev, "Location already in use\n"); + return -EINVAL; + } + + /* Check if the filtering-mode could support the filter */ + if (edev->arfs->filter_count && + edev->arfs->mode != t->mode) { + DP_INFO(edev, + "flow_spec would require filtering mode %08x, but %08x is configured\n", + t->mode, edev->arfs->filter_count); + return -EINVAL; + } + + /* If drop requested then no need to validate other data */ + if (fs->ring_cookie == RX_CLS_FLOW_DISC) + return 0; + + if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie)) + return 0; + + if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) { + DP_INFO(edev, "Queue out-of-bounds\n"); + return -EINVAL; + } + + return 0; +} + +static int qede_flow_spec_to_tuple(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + memset(t, 0, sizeof(*t)); + + if (qede_flow_spec_validate_unused(edev, fs)) + return -EOPNOTSUPP; + + switch ((fs->flow_type & ~FLOW_EXT)) { + case TCP_V4_FLOW: + return qede_flow_spec_to_tuple_tcpv4(edev, t, fs); + case UDP_V4_FLOW: + return qede_flow_spec_to_tuple_udpv4(edev, t, fs); + case TCP_V6_FLOW: + return qede_flow_spec_to_tuple_tcpv6(edev, t, fs); + case UDP_V6_FLOW: + return qede_flow_spec_to_tuple_udpv6(edev, t, fs); + default: + DP_VERBOSE(edev, NETIF_MSG_IFUP, + "Can't support flow of type %08x\n", fs->flow_type); + return -EOPNOTSUPP; + } + + return 0; +} + +int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) +{ + struct ethtool_rx_flow_spec *fsp = &info->fs; + struct qede_arfs_fltr_node *n; + struct qede_arfs_tuple t; + int min_hlen, rc; + + __qede_lock(edev); + + if (!edev->arfs) { + rc = -EPERM; + goto unlock; + } + + /* Translate the flow specification into something fittign our DB */ + rc = qede_flow_spec_to_tuple(edev, &t, fsp); + if (rc) + goto unlock; + + /* Make sure location is valid and filter isn't already set */ + rc = qede_flow_spec_validate(edev, fsp, &t); + if (rc) + goto unlock; + + if (qede_flow_find_fltr(edev, &t)) { + rc = -EINVAL; + goto unlock; + } + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) { + rc = -ENOMEM; + goto unlock; + } + + min_hlen = qede_flow_get_min_header_size(&t); + n->data = kzalloc(min_hlen, GFP_KERNEL); + if (!n->data) { + kfree(n); + rc = -ENOMEM; + goto unlock; + } + + n->sw_id = fsp->location; + set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap); + n->buf_len = min_hlen; + + memcpy(&n->tuple, &t, sizeof(n->tuple)); + + qede_flow_set_destination(edev, n, fsp); + + /* Build a minimal header according to the flow */ + n->tuple.build_hdr(&n->tuple, n->data); + + rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0); + if (rc) + goto unlock; + + qede_configure_arfs_fltr(edev, n, n->rxq_id, true); + rc = qede_poll_arfs_filter_config(edev, n); +unlock: + __qede_unlock(edev); + + return rc; +} From 37c5d3efd7f801091d1470d7682be474e0ea2bd0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:54 +0100 Subject: [PATCH 0688/1436] qede: use ethtool_rx_flow_rule() to remove duplicated parser code The qede driver supports for ethtool_rx_flow_spec and flower, both codebases look very similar. This patch uses the ethtool_rx_flow_rule() infrastructure to remove the duplicated ethtool_rx_flow_spec parser and consolidate ACL offload support around the flow_rule infrastructure. Furthermore, more code can be consolidated by merging qede_add_cls_rule() and qede_add_tc_flower_fltr(), these two functions also look very similar. This driver currently provides simple ACL support, such as 5-tuple matching, drop policy and queue to CPU. Drivers that support more features can benefit from this infrastructure to save even more redundant codebase. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qede/qede_filter.c | 279 +++++------------- 1 file changed, 76 insertions(+), 203 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index ed77950f6cf9..add922b93d2c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1665,132 +1665,6 @@ static int qede_set_v6_tuple_to_profile(struct qede_dev *edev, return 0; } -static int qede_flow_spec_to_tuple_ipv4_common(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - if ((fs->h_u.tcp_ip4_spec.ip4src & - fs->m_u.tcp_ip4_spec.ip4src) != fs->h_u.tcp_ip4_spec.ip4src) { - DP_INFO(edev, "Don't support IP-masks\n"); - return -EOPNOTSUPP; - } - - if ((fs->h_u.tcp_ip4_spec.ip4dst & - fs->m_u.tcp_ip4_spec.ip4dst) != fs->h_u.tcp_ip4_spec.ip4dst) { - DP_INFO(edev, "Don't support IP-masks\n"); - return -EOPNOTSUPP; - } - - if ((fs->h_u.tcp_ip4_spec.psrc & - fs->m_u.tcp_ip4_spec.psrc) != fs->h_u.tcp_ip4_spec.psrc) { - DP_INFO(edev, "Don't support port-masks\n"); - return -EOPNOTSUPP; - } - - if ((fs->h_u.tcp_ip4_spec.pdst & - fs->m_u.tcp_ip4_spec.pdst) != fs->h_u.tcp_ip4_spec.pdst) { - DP_INFO(edev, "Don't support port-masks\n"); - return -EOPNOTSUPP; - } - - if (fs->h_u.tcp_ip4_spec.tos) { - DP_INFO(edev, "Don't support tos\n"); - return -EOPNOTSUPP; - } - - t->eth_proto = htons(ETH_P_IP); - t->src_ipv4 = fs->h_u.tcp_ip4_spec.ip4src; - t->dst_ipv4 = fs->h_u.tcp_ip4_spec.ip4dst; - t->src_port = fs->h_u.tcp_ip4_spec.psrc; - t->dst_port = fs->h_u.tcp_ip4_spec.pdst; - - return qede_set_v4_tuple_to_profile(edev, t); -} - -static int qede_flow_spec_to_tuple_tcpv4(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - t->ip_proto = IPPROTO_TCP; - - if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs)) - return -EINVAL; - - return 0; -} - -static int qede_flow_spec_to_tuple_udpv4(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - t->ip_proto = IPPROTO_UDP; - - if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs)) - return -EINVAL; - - return 0; -} - -static int qede_flow_spec_to_tuple_ipv6_common(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - struct in6_addr zero_addr; - - memset(&zero_addr, 0, sizeof(zero_addr)); - - if ((fs->h_u.tcp_ip6_spec.psrc & - fs->m_u.tcp_ip6_spec.psrc) != fs->h_u.tcp_ip6_spec.psrc) { - DP_INFO(edev, "Don't support port-masks\n"); - return -EOPNOTSUPP; - } - - if ((fs->h_u.tcp_ip6_spec.pdst & - fs->m_u.tcp_ip6_spec.pdst) != fs->h_u.tcp_ip6_spec.pdst) { - DP_INFO(edev, "Don't support port-masks\n"); - return -EOPNOTSUPP; - } - - if (fs->h_u.tcp_ip6_spec.tclass) { - DP_INFO(edev, "Don't support tclass\n"); - return -EOPNOTSUPP; - } - - t->eth_proto = htons(ETH_P_IPV6); - memcpy(&t->src_ipv6, &fs->h_u.tcp_ip6_spec.ip6src, - sizeof(struct in6_addr)); - memcpy(&t->dst_ipv6, &fs->h_u.tcp_ip6_spec.ip6dst, - sizeof(struct in6_addr)); - t->src_port = fs->h_u.tcp_ip6_spec.psrc; - t->dst_port = fs->h_u.tcp_ip6_spec.pdst; - - return qede_set_v6_tuple_to_profile(edev, t, &zero_addr); -} - -static int qede_flow_spec_to_tuple_tcpv6(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - t->ip_proto = IPPROTO_TCP; - - if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs)) - return -EINVAL; - - return 0; -} - -static int qede_flow_spec_to_tuple_udpv6(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) -{ - t->ip_proto = IPPROTO_UDP; - - if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs)) - return -EINVAL; - - return 0; -} - /* Must be called while qede lock is held */ static struct qede_arfs_fltr_node * qede_flow_find_fltr(struct qede_dev *edev, struct qede_arfs_tuple *t) @@ -1875,34 +1749,38 @@ static int qede_parse_actions(struct qede_dev *edev, struct flow_action *flow_action) { const struct flow_action_entry *act; - int rc = -EINVAL, num_act = 0, i; - bool is_drop = false; + int i; if (!flow_action_has_entries(flow_action)) { - DP_NOTICE(edev, "No tc actions received\n"); - return rc; + DP_NOTICE(edev, "No actions received\n"); + return -EINVAL; } flow_action_for_each(i, act, flow_action) { - num_act++; + switch (act->id) { + case FLOW_ACTION_DROP: + break; + case FLOW_ACTION_QUEUE: + if (act->queue.vf) + break; - if (act->id == FLOW_ACTION_DROP) - is_drop = true; + if (act->queue.index >= QEDE_RSS_COUNT(edev)) { + DP_INFO(edev, "Queue out-of-bounds\n"); + return -EINVAL; + } + break; + default: + return -EINVAL; + } } - if (num_act == 1 && is_drop) - return 0; - - return rc; + return 0; } static int -qede_tc_parse_ports(struct qede_dev *edev, - struct tc_cls_flower_offload *f, - struct qede_arfs_tuple *t) +qede_flow_parse_ports(struct qede_dev *edev, struct flow_rule *rule, + struct qede_arfs_tuple *t) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; @@ -1921,11 +1799,9 @@ qede_tc_parse_ports(struct qede_dev *edev, } static int -qede_tc_parse_v6_common(struct qede_dev *edev, - struct tc_cls_flower_offload *f, - struct qede_arfs_tuple *t) +qede_flow_parse_v6_common(struct qede_dev *edev, struct flow_rule *rule, + struct qede_arfs_tuple *t) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct in6_addr zero_addr, addr; memset(&zero_addr, 0, sizeof(addr)); @@ -1948,19 +1824,16 @@ qede_tc_parse_v6_common(struct qede_dev *edev, memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr)); } - if (qede_tc_parse_ports(edev, f, t)) + if (qede_flow_parse_ports(edev, rule, t)) return -EINVAL; return qede_set_v6_tuple_to_profile(edev, t, &zero_addr); } static int -qede_tc_parse_v4_common(struct qede_dev *edev, - struct tc_cls_flower_offload *f, +qede_flow_parse_v4_common(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *t) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; @@ -1975,62 +1848,56 @@ qede_tc_parse_v4_common(struct qede_dev *edev, t->dst_ipv4 = match.key->dst; } - if (qede_tc_parse_ports(edev, f, t)) + if (qede_flow_parse_ports(edev, rule, t)) return -EINVAL; return qede_set_v4_tuple_to_profile(edev, t); } static int -qede_tc_parse_tcp_v6(struct qede_dev *edev, - struct tc_cls_flower_offload *f, +qede_flow_parse_tcp_v6(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *tuple) { tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_tc_parse_v6_common(edev, f, tuple); + return qede_flow_parse_v6_common(edev, rule, tuple); } static int -qede_tc_parse_tcp_v4(struct qede_dev *edev, - struct tc_cls_flower_offload *f, +qede_flow_parse_tcp_v4(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *tuple) { tuple->ip_proto = IPPROTO_TCP; tuple->eth_proto = htons(ETH_P_IP); - return qede_tc_parse_v4_common(edev, f, tuple); + return qede_flow_parse_v4_common(edev, rule, tuple); } static int -qede_tc_parse_udp_v6(struct qede_dev *edev, - struct tc_cls_flower_offload *f, +qede_flow_parse_udp_v6(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *tuple) { tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IPV6); - return qede_tc_parse_v6_common(edev, f, tuple); + return qede_flow_parse_v6_common(edev, rule, tuple); } static int -qede_tc_parse_udp_v4(struct qede_dev *edev, - struct tc_cls_flower_offload *f, +qede_flow_parse_udp_v4(struct qede_dev *edev, struct flow_rule *rule, struct qede_arfs_tuple *tuple) { tuple->ip_proto = IPPROTO_UDP; tuple->eth_proto = htons(ETH_P_IP); - return qede_tc_parse_v4_common(edev, f, tuple); + return qede_flow_parse_v4_common(edev, rule, tuple); } static int -qede_parse_flower_attr(struct qede_dev *edev, __be16 proto, - struct tc_cls_flower_offload *f, - struct qede_arfs_tuple *tuple) +qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, + struct flow_rule *rule, struct qede_arfs_tuple *tuple) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; int rc = -EINVAL; u8 ip_proto = 0; @@ -2062,15 +1929,15 @@ qede_parse_flower_attr(struct qede_dev *edev, __be16 proto, } if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP)) - rc = qede_tc_parse_tcp_v4(edev, f, tuple); + rc = qede_flow_parse_tcp_v4(edev, rule, tuple); else if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IPV6)) - rc = qede_tc_parse_tcp_v6(edev, f, tuple); + rc = qede_flow_parse_tcp_v6(edev, rule, tuple); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IP)) - rc = qede_tc_parse_udp_v4(edev, f, tuple); + rc = qede_flow_parse_udp_v4(edev, rule, tuple); else if (ip_proto == IPPROTO_UDP && proto == htons(ETH_P_IPV6)) - rc = qede_tc_parse_udp_v6(edev, f, tuple); + rc = qede_flow_parse_udp_v6(edev, rule, tuple); else - DP_NOTICE(edev, "Invalid tc protocol request\n"); + DP_NOTICE(edev, "Invalid protocol request\n"); return rc; } @@ -2090,7 +1957,7 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flower_attr(edev, proto, f, &t)) + if (qede_parse_flow_attr(edev, proto, f->rule, &t)) goto unlock; /* Validate profile mode and number of filters */ @@ -2147,16 +2014,17 @@ unlock: } static int qede_flow_spec_validate(struct qede_dev *edev, - struct ethtool_rx_flow_spec *fs, - struct qede_arfs_tuple *t) + struct flow_action *flow_action, + struct qede_arfs_tuple *t, + __u32 location) { - if (fs->location >= QEDE_RFS_MAX_FLTR) { + if (location >= QEDE_RFS_MAX_FLTR) { DP_INFO(edev, "Location out-of-bounds\n"); return -EINVAL; } /* Check location isn't already in use */ - if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) { + if (test_bit(location, edev->arfs->arfs_fltr_bmap)) { DP_INFO(edev, "Location already in use\n"); return -EINVAL; } @@ -2170,46 +2038,56 @@ static int qede_flow_spec_validate(struct qede_dev *edev, return -EINVAL; } - /* If drop requested then no need to validate other data */ - if (fs->ring_cookie == RX_CLS_FLOW_DISC) - return 0; - - if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie)) - return 0; - - if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) { - DP_INFO(edev, "Queue out-of-bounds\n"); + if (qede_parse_actions(edev, flow_action)) return -EINVAL; - } return 0; } -static int qede_flow_spec_to_tuple(struct qede_dev *edev, - struct qede_arfs_tuple *t, - struct ethtool_rx_flow_spec *fs) +static int qede_flow_spec_to_rule(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) { - memset(t, 0, sizeof(*t)); + struct ethtool_rx_flow_spec_input input = {}; + struct ethtool_rx_flow_rule *flow; + __be16 proto; + int err = 0; if (qede_flow_spec_validate_unused(edev, fs)) return -EOPNOTSUPP; switch ((fs->flow_type & ~FLOW_EXT)) { case TCP_V4_FLOW: - return qede_flow_spec_to_tuple_tcpv4(edev, t, fs); case UDP_V4_FLOW: - return qede_flow_spec_to_tuple_udpv4(edev, t, fs); + proto = htons(ETH_P_IP); + break; case TCP_V6_FLOW: - return qede_flow_spec_to_tuple_tcpv6(edev, t, fs); case UDP_V6_FLOW: - return qede_flow_spec_to_tuple_udpv6(edev, t, fs); + proto = htons(ETH_P_IPV6); + break; default: DP_VERBOSE(edev, NETIF_MSG_IFUP, "Can't support flow of type %08x\n", fs->flow_type); return -EOPNOTSUPP; } - return 0; + input.fs = fs; + flow = ethtool_rx_flow_rule_create(&input); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (qede_parse_flow_attr(edev, proto, flow->rule, t)) { + err = -EINVAL; + goto err_out; + } + + /* Make sure location is valid and filter isn't already set */ + err = qede_flow_spec_validate(edev, &flow->rule->action, t, + fs->location); +err_out: + ethtool_rx_flow_rule_destroy(flow); + return err; + } int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) @@ -2227,12 +2105,7 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) } /* Translate the flow specification into something fittign our DB */ - rc = qede_flow_spec_to_tuple(edev, &t, fsp); - if (rc) - goto unlock; - - /* Make sure location is valid and filter isn't already set */ - rc = qede_flow_spec_validate(edev, fsp, &t); + rc = qede_flow_spec_to_rule(edev, &t, fsp); if (rc) goto unlock; From 5f320f09b3d93200eb61eb9a26e1d2b3078eb712 Mon Sep 17 00:00:00 2001 From: Martin Weinelt Date: Wed, 6 Feb 2019 15:39:14 +0100 Subject: [PATCH 0689/1436] batman-adv: fix memory leak in in batadv_dat_put_dhcp batadv_dat_put_dhcp is creating a new ARP packet via batadv_dat_arp_create_reply and tries to forward it via batadv_dat_send_data to different peers in the DHT. The original skb is not consumed by batadv_dat_send_data and thus has to be consumed by the caller. Fixes: b61ec31c8575 ("batman-adv: Snoop DHCPACKs for DAT") Signed-off-by: Martin Weinelt [sven@narfation.org: add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 899ab051ffce..310a4f353008 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1711,6 +1711,8 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr, batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + consume_skb(skb); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n", &ip_dst, hw_dst, batadv_print_vid(vid)); From 27c874867c4e92278cc78f3c27574fb92d7f1578 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Mon, 4 Feb 2019 17:00:35 +0000 Subject: [PATCH 0690/1436] dpaa2-eth: Use a single page per Rx buffer Instead of allocating page fragments via the network stack, use the page allocator directly. For now, we consume one page for each Rx buffer. With the new memory model we are free to consider adding more XDP support. Performance decreases slightly in some IP forwarding cases. No visible effect on termination traffic. The driver memory footprint increases as a result of this change, but it is still small enough to not really matter. Another side effect is that now Rx buffer alignment requirements are naturally satisfied without any additional actions needed. Remove alignment related code, except in the buffer layout information conveyed to MC, as hardware still needs to know the alignment value we guarantee. Signed-off-by: Ioana Ciornei Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 61 ++++++++++--------- .../net/ethernet/freescale/dpaa2/dpaa2-eth.h | 21 ++----- 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 04925c731f0b..6e58de628e5e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -86,16 +86,16 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv, for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) { addr = dpaa2_sg_get_addr(&sgt[i]); sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr); - dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); - skb_free_frag(sg_vaddr); + free_pages((unsigned long)sg_vaddr, 0); if (dpaa2_sg_is_final(&sgt[i])) break; } free_buf: - skb_free_frag(vaddr); + free_pages((unsigned long)vaddr, 0); } /* Build a linear skb based on a single-buffer frame descriptor */ @@ -109,7 +109,7 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch, ch->buf_count--; - skb = build_skb(fd_vaddr, DPAA2_ETH_SKB_SIZE); + skb = build_skb(fd_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE); if (unlikely(!skb)) return NULL; @@ -144,19 +144,19 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv, /* Get the address and length from the S/G entry */ sg_addr = dpaa2_sg_get_addr(sge); sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr); - dma_unmap_single(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); sg_length = dpaa2_sg_get_len(sge); if (i == 0) { /* We build the skb around the first data buffer */ - skb = build_skb(sg_vaddr, DPAA2_ETH_SKB_SIZE); + skb = build_skb(sg_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE); if (unlikely(!skb)) { /* Free the first SG entry now, since we already * unmapped it and obtained the virtual address */ - skb_free_frag(sg_vaddr); + free_pages((unsigned long)sg_vaddr, 0); /* We still need to subtract the buffers used * by this FD from our software counter @@ -211,9 +211,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count) for (i = 0; i < count; i++) { vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]); - dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); - skb_free_frag(vaddr); + dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, 0); } } @@ -378,16 +378,16 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, return; } - dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); skb = build_linear_skb(ch, fd, vaddr); } else if (fd_format == dpaa2_fd_sg) { WARN_ON(priv->xdp_prog); - dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); skb = build_frag_skb(priv, ch, buf_data); - skb_free_frag(vaddr); + free_pages((unsigned long)vaddr, 0); percpu_extras->rx_sg_frames++; percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd); } else { @@ -903,7 +903,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, { struct device *dev = priv->net_dev->dev.parent; u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; - void *buf; + struct page *page; dma_addr_t addr; int i, err; @@ -911,14 +911,16 @@ static int add_bufs(struct dpaa2_eth_priv *priv, /* Allocate buffer visible to WRIOP + skb shared info + * alignment padding */ - buf = napi_alloc_frag(dpaa2_eth_buf_raw_size(priv)); - if (unlikely(!buf)) + /* allocate one page for each Rx buffer. WRIOP sees + * the entire page except for a tailroom reserved for + * skb shared info + */ + page = dev_alloc_pages(0); + if (!page) goto err_alloc; - buf = PTR_ALIGN(buf, priv->rx_buf_align); - - addr = dma_map_single(dev, buf, DPAA2_ETH_RX_BUF_SIZE, - DMA_BIDIRECTIONAL); + addr = dma_map_page(dev, page, 0, DPAA2_ETH_RX_BUF_SIZE, + DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(dev, addr))) goto err_map; @@ -926,7 +928,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, /* tracing point */ trace_dpaa2_eth_buf_seed(priv->net_dev, - buf, dpaa2_eth_buf_raw_size(priv), + page, DPAA2_ETH_RX_BUF_RAW_SIZE, addr, DPAA2_ETH_RX_BUF_SIZE, bpid); } @@ -948,7 +950,7 @@ release_bufs: return i; err_map: - skb_free_frag(buf); + __free_pages(page, 0); err_alloc: /* If we managed to allocate at least some buffers, * release them to hardware @@ -2134,6 +2136,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv) { struct device *dev = priv->net_dev->dev.parent; struct dpni_buffer_layout buf_layout = {0}; + u16 rx_buf_align; int err; /* We need to check for WRIOP version 1.0.0, but depending on the MC @@ -2142,9 +2145,9 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv) */ if (priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(0, 0, 0) || priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(1, 0, 0)) - priv->rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN_REV1; + rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN_REV1; else - priv->rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN; + rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN; /* tx buffer */ buf_layout.private_data_size = DPAA2_ETH_SWA_SIZE; @@ -2184,7 +2187,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv) /* rx buffer */ buf_layout.pass_frame_status = true; buf_layout.pass_parser_result = true; - buf_layout.data_align = priv->rx_buf_align; + buf_layout.data_align = rx_buf_align; buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv); buf_layout.private_data_size = 0; buf_layout.options = DPNI_BUF_LAYOUT_OPT_PARSER_RESULT | diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 31fe486ec25f..da3d039cc9bf 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -63,9 +63,11 @@ /* Hardware requires alignment for ingress/egress buffer addresses */ #define DPAA2_ETH_TX_BUF_ALIGN 64 -#define DPAA2_ETH_RX_BUF_SIZE 2048 -#define DPAA2_ETH_SKB_SIZE \ - (DPAA2_ETH_RX_BUF_SIZE + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define DPAA2_ETH_RX_BUF_RAW_SIZE PAGE_SIZE +#define DPAA2_ETH_RX_BUF_TAILROOM \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +#define DPAA2_ETH_RX_BUF_SIZE \ + (DPAA2_ETH_RX_BUF_RAW_SIZE - DPAA2_ETH_RX_BUF_TAILROOM) /* Hardware annotation area in RX/TX buffers */ #define DPAA2_ETH_RX_HWA_SIZE 64 @@ -343,7 +345,6 @@ struct dpaa2_eth_priv { bool rx_tstamp; /* Rx timestamping enabled */ u16 tx_qdid; - u16 rx_buf_align; struct fsl_mc_io *mc_io; /* Cores which have an affine DPIO/DPCON. * This is the cpu set on which Rx and Tx conf frames are processed @@ -418,15 +419,6 @@ enum dpaa2_eth_rx_dist { DPAA2_ETH_RX_DIST_CLS }; -/* Hardware only sees DPAA2_ETH_RX_BUF_SIZE, but the skb built around - * the buffer also needs space for its shared info struct, and we need - * to allocate enough to accommodate hardware alignment restrictions - */ -static inline unsigned int dpaa2_eth_buf_raw_size(struct dpaa2_eth_priv *priv) -{ - return DPAA2_ETH_SKB_SIZE + priv->rx_buf_align; -} - static inline unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv, struct sk_buff *skb) @@ -451,8 +443,7 @@ unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv, */ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv) { - return priv->tx_data_offset + DPAA2_ETH_TX_BUF_ALIGN - - DPAA2_ETH_RX_HWA_SIZE; + return priv->tx_data_offset - DPAA2_ETH_RX_HWA_SIZE; } int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags); From 0723a3aec0ade2d4223800ad26323a9c5369c0a0 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Mon, 4 Feb 2019 17:00:35 +0000 Subject: [PATCH 0691/1436] dpaa2-eth: Use napi_consume_skb() While in NAPI context, free skbs by calling napi_consume_skb() instead of dev_kfree_skb(), to take advantage of the bulk freeing mechanism. Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 6e58de628e5e..3bc54374968a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -657,7 +657,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv, * dpaa2_eth_tx(). */ static void free_tx_fd(const struct dpaa2_eth_priv *priv, - const struct dpaa2_fd *fd) + const struct dpaa2_fd *fd, bool in_napi) { struct device *dev = priv->net_dev->dev.parent; dma_addr_t fd_addr; @@ -712,7 +712,7 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv, skb_free_frag(skbh); /* Move on with skb release */ - dev_kfree_skb(skb); + napi_consume_skb(skb, in_napi); } static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) @@ -795,7 +795,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) if (unlikely(err < 0)) { percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ - free_tx_fd(priv, &fd); + free_tx_fd(priv, &fd, false); } else { fd_len = dpaa2_fd_get_len(&fd); percpu_stats->tx_packets++; @@ -837,7 +837,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv, /* Check frame errors in the FD field */ fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK; - free_tx_fd(priv, fd); + free_tx_fd(priv, fd, true); if (likely(!fd_errors)) return; From 1fa0f68c9255092c862f9280072ab0b947773b02 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Mon, 4 Feb 2019 17:00:36 +0000 Subject: [PATCH 0692/1436] dpaa2-eth: Use FQ-based DPIO enqueue API Starting with MC10.14.0, dpaa2_io_service_enqueue_fq() API is functional. Since there are a number of cases where it offers better performance compared to the currently used enqueue function, switch to it for firmware versions that support it. Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 41 ++++++++++++++++--- .../net/ethernet/freescale/dpaa2/dpaa2-eth.h | 4 ++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 3bc54374968a..87777b09f5e0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -264,9 +264,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd, fq = &priv->fq[queue_id]; for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = dpaa2_io_service_enqueue_qd(fq->channel->dpio, - priv->tx_qdid, 0, - fq->tx_qdbin, fd); + err = priv->enqueue(priv, fq, fd, 0); if (err != -EBUSY) break; } @@ -785,9 +783,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) queue_mapping = skb_get_queue_mapping(skb); fq = &priv->fq[queue_mapping]; for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = dpaa2_io_service_enqueue_qd(fq->channel->dpio, - priv->tx_qdid, 0, - fq->tx_qdbin, &fd); + err = priv->enqueue(priv, fq, &fd, 0); if (err != -EBUSY) break; } @@ -2205,6 +2201,36 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv) return 0; } +#define DPNI_ENQUEUE_FQID_VER_MAJOR 7 +#define DPNI_ENQUEUE_FQID_VER_MINOR 9 + +static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_fq *fq, + struct dpaa2_fd *fd, u8 prio) +{ + return dpaa2_io_service_enqueue_qd(fq->channel->dpio, + priv->tx_qdid, prio, + fq->tx_qdbin, fd); +} + +static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_fq *fq, + struct dpaa2_fd *fd, + u8 prio __always_unused) +{ + return dpaa2_io_service_enqueue_fq(fq->channel->dpio, + fq->tx_fqid, fd); +} + +static void set_enqueue_mode(struct dpaa2_eth_priv *priv) +{ + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR, + DPNI_ENQUEUE_FQID_VER_MINOR) < 0) + priv->enqueue = dpaa2_eth_enqueue_qd; + else + priv->enqueue = dpaa2_eth_enqueue_fq; +} + /* Configure the DPNI object this interface is associated with */ static int setup_dpni(struct fsl_mc_device *ls_dev) { @@ -2258,6 +2284,8 @@ static int setup_dpni(struct fsl_mc_device *ls_dev) if (err) goto close; + set_enqueue_mode(priv); + priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) * dpaa2_eth_fs_count(priv), GFP_KERNEL); if (!priv->cls_rules) @@ -2342,6 +2370,7 @@ static int setup_tx_flow(struct dpaa2_eth_priv *priv, } fq->tx_qdbin = qid.qdbin; + fq->tx_fqid = qid.fqid; err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index da3d039cc9bf..3e29dece51e4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -276,6 +276,7 @@ struct dpaa2_eth_priv; struct dpaa2_eth_fq { u32 fqid; u32 tx_qdbin; + u32 tx_fqid; u16 flowid; int target_cpu; u32 dq_frames; @@ -328,6 +329,9 @@ struct dpaa2_eth_priv { u8 num_fqs; struct dpaa2_eth_fq fq[DPAA2_ETH_MAX_QUEUES]; + int (*enqueue)(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_fq *fq, + struct dpaa2_fd *fd, u8 prio); u8 num_channels; struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS]; From 20fb0572826b8dca465ad97b0b7eddd78bafb7ae Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Mon, 4 Feb 2019 17:00:37 +0000 Subject: [PATCH 0693/1436] dpaa2-eth: Update buffer pool refill threshold Add more buffers to the Rx buffer pool as soon as 7 of them get consumed, instead of waiting for their number to drop below a fixed threshold. 7 is the number of buffers that can be released in the pool via a single DPIO command. Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 3e29dece51e4..9510928b7cca 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -53,7 +53,8 @@ */ #define DPAA2_ETH_MAX_FRAMES_PER_QUEUE (DPAA2_ETH_TAILDROP_THRESH / 64) #define DPAA2_ETH_NUM_BUFS (DPAA2_ETH_MAX_FRAMES_PER_QUEUE + 256) -#define DPAA2_ETH_REFILL_THRESH DPAA2_ETH_MAX_FRAMES_PER_QUEUE +#define DPAA2_ETH_REFILL_THRESH \ + (DPAA2_ETH_NUM_BUFS - DPAA2_ETH_BUFS_PER_CMD) /* Maximum number of buffers that can be acquired/released through a single * QBMan command From 6dce3c20ac429e7a651d728e375853370c796e8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Feb 2019 08:36:06 -0800 Subject: [PATCH 0694/1436] rxrpc: bad unlock balance in rxrpc_recvmsg When either "goto wait_interrupted;" or "goto wait_error;" paths are taken, socket lock has already been released. This patch fixes following syzbot splat : WARNING: bad unlock balance detected! 5.0.0-rc4+ #59 Not tainted ------------------------------------- syz-executor223/8256 is trying to release lock (sk_lock-AF_RXRPC) at: [] rxrpc_recvmsg+0x6d3/0x3099 net/rxrpc/recvmsg.c:598 but there are no more locks to release! other info that might help us debug this: 1 lock held by syz-executor223/8256: #0: 00000000fa9ed0f4 (slock-AF_RXRPC){+...}, at: spin_lock_bh include/linux/spinlock.h:334 [inline] #0: 00000000fa9ed0f4 (slock-AF_RXRPC){+...}, at: release_sock+0x20/0x1c0 net/core/sock.c:2798 stack backtrace: CPU: 1 PID: 8256 Comm: syz-executor223 Not tainted 5.0.0-rc4+ #59 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_unlock_imbalance_bug kernel/locking/lockdep.c:3391 [inline] print_unlock_imbalance_bug.cold+0x114/0x123 kernel/locking/lockdep.c:3368 __lock_release kernel/locking/lockdep.c:3601 [inline] lock_release+0x67e/0xa00 kernel/locking/lockdep.c:3860 sock_release_ownership include/net/sock.h:1471 [inline] release_sock+0x183/0x1c0 net/core/sock.c:2808 rxrpc_recvmsg+0x6d3/0x3099 net/rxrpc/recvmsg.c:598 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg net/socket.c:801 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:797 __sys_recvfrom+0x1ff/0x350 net/socket.c:1845 __do_sys_recvfrom net/socket.c:1863 [inline] __se_sys_recvfrom net/socket.c:1859 [inline] __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:1859 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x446379 Code: e8 2c b3 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe5da89fd98 EFLAGS: 00000246 ORIG_RAX: 000000000000002d RAX: ffffffffffffffda RBX: 00000000006dbc28 RCX: 0000000000446379 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00000000006dbc20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dbc2c R13: 0000000000000000 R14: 0000000000000000 R15: 20c49ba5e353f7cf Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: Eric Dumazet Cc: David Howells Reported-by: syzbot Signed-off-by: David S. Miller --- net/rxrpc/recvmsg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eaf19ebaa964..3f7bb11f3290 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -596,6 +596,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -604,7 +605,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** From 57186a5f43d093e769076a4f1bf043351d8668a0 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Mon, 4 Feb 2019 18:47:45 +0000 Subject: [PATCH 0695/1436] devlink: add hardware errors tracing facility Define a tracepoint and allow user to trace messages in case of an hardware error code for hardware associated with devlink instance. Signed-off-by: Nir Dotan Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/trace/events/devlink.h | 33 +++++++++++++++++++++++++++++++++ net/core/devlink.c | 1 + 2 files changed, 34 insertions(+) diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 44acfbca1266..40705364a50f 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -46,6 +46,35 @@ TRACE_EVENT(devlink_hwmsg, (int) __entry->len, __get_dynamic_array(buf), __entry->len) ); +/* + * Tracepoint for devlink hardware error: + */ +TRACE_EVENT(devlink_hwerr, + TP_PROTO(const struct devlink *devlink, int err, const char *msg), + + TP_ARGS(devlink, err, msg), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) + __field(int, err) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); + __entry->err = err; + __assign_str(msg, msg); + ), + + TP_printk("bus_name=%s dev_name=%s driver_name=%s err=%d %s", + __get_str(bus_name), __get_str(dev_name), + __get_str(driver_name), __entry->err, __get_str(msg)) +); + #endif /* _TRACE_DEVLINK_H */ /* This part must be outside protection */ @@ -64,6 +93,10 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink, { } +static inline void trace_devlink_hwerr(const struct devlink *devlink, + int err, const char *msg) +{ +} #endif /* _TRACE_DEVLINK_H */ #endif diff --git a/net/core/devlink.c b/net/core/devlink.c index 52bf27491fb8..cd0d393bc62d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -81,6 +81,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { EXPORT_SYMBOL(devlink_dpipe_header_ipv6); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); +EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); static LIST_HEAD(devlink_list); From d32d02a5489b327e700deac32ecab4669bff1109 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Mon, 4 Feb 2019 18:47:46 +0000 Subject: [PATCH 0696/1436] mlxsw: core: Trace EMAD errors Trace EMAD errors returned from HW. Signed-off-by: Nir Dotan Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index ddedf8ab5b64..4f6fa515394e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1460,13 +1460,17 @@ static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) if (trans->retries) dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); - if (err) + if (err) { dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", trans->tid, trans->reg->id, mlxsw_reg_id_str(trans->reg->id), mlxsw_core_reg_access_type_str(trans->type), trans->emad_status, mlxsw_emad_op_tlv_status_str(trans->emad_status)); + trace_devlink_hwerr(priv_to_devlink(mlxsw_core), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + } list_del(&trans->bulk_list); kfree_rcu(trans, rcu); From 90cc55f067f6ca0e64e5e52883ece47d8af7b67b Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Wed, 6 Feb 2019 10:45:37 -0800 Subject: [PATCH 0697/1436] Input: bma150 - register input device after setting private data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we introduce a race condition where userspace can request input before we're ready leading to null pointer dereference such as input: bma150 as /devices/platform/i2c-gpio-2/i2c-5/5-0038/input/input3 Unable to handle kernel NULL pointer dereference at virtual address 00000018 pgd = (ptrval) [00000018] *pgd=55dac831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] PREEMPT ARM Modules linked in: bma150 input_polldev [last unloaded: bma150] CPU: 0 PID: 2870 Comm: accelerometer Not tainted 5.0.0-rc3-dirty #46 Hardware name: Samsung S5PC110/S5PV210-based board PC is at input_event+0x8/0x60 LR is at bma150_report_xyz+0x9c/0xe0 [bma150] pc : [<80450f70>] lr : [<7f0a614c>] psr: 800d0013 sp : a4c1fd78 ip : 00000081 fp : 00020000 r10: 00000000 r9 : a5e2944c r8 : a7455000 r7 : 00000016 r6 : 00000101 r5 : a7617940 r4 : 80909048 r3 : fffffff2 r2 : 00000000 r1 : 00000003 r0 : 00000000 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 54e34019 DAC: 00000051 Process accelerometer (pid: 2870, stack limit = 0x(ptrval)) Stackck: (0xa4c1fd78 to 0xa4c20000) fd60: fffffff3 fc813f6c fd80: 40410581 d7530ce3 a5e2817c a7617f00 a5e29404 a5e2817c 00000000 7f008324 fda0: a5e28000 8044f59c a5fdd9d0 a5e2945c a46a4a00 a5e29668 a7455000 80454f10 fdc0: 80909048 a5e29668 a5fdd9d0 a46a4a00 806316d0 00000000 a46a4a00 801df5f0 fde0: 00000000 d7530ce3 a4c1fec0 a46a4a00 00000000 a5fdd9d0 a46a4a08 801df53c fe00: 00000000 801d74bc a4c1fec0 00000000 a4c1ff70 00000000 a7038da8 00000000 fe20: a46a4a00 801e91fc a411bbe0 801f2e88 00000004 00000000 80909048 00000041 fe40: 00000000 00020000 00000000 dead4ead a6a88da0 00000000 ffffe000 806fcae8 fe60: a4c1fec8 00000000 80909048 00000002 a5fdd9d0 a7660110 a411bab0 00000001 fe80: dead4ead ffffffff ffffffff a4c1fe8c a4c1fe8c d7530ce3 20000013 80909048 fea0: 80909048 a4c1ff70 00000001 fffff000 a4c1e000 00000005 00026038 801eabd8 fec0: a7660110 a411bab0 b9394901 00000006 a696201b 76fb3000 00000000 a7039720 fee0: a5fdd9d0 00000101 00000002 00000096 00000000 00000000 00000000 a4c1ff00 ff00: a6b310f4 805cb174 a6b310f4 00000010 00000fe0 00000010 a4c1e000 d7530ce3 ff20: 00000003 a5f41400 a5f41424 00000000 a6962000 00000000 00000003 00000002 ff40: ffffff9c 000a0000 80909048 d7530ce3 a6962000 00000003 80909048 ffffff9c ff60: a6962000 801d890c 00000000 00000000 00020000 a7590000 00000004 00000100 ff80: 00000001 d7530ce3 000288b8 00026320 000288b8 00000005 80101204 a4c1e000 ffa0: 00000005 80101000 000288b8 00026320 000288b8 000a0000 00000000 00000000 ffc0: 000288b8 00026320 000288b8 00000005 7eef3bac 000264e8 00028ad8 00026038 ffe0: 00000005 7eef3300 76f76e91 76f78546 800d0030 000288b8 00000000 00000000 [<80450f70>] (input_event) from [] (0xa5e2817c) Code: e1a08148 eaffffa8 e351001f 812fff1e (e590c018) ---[ end trace 1c691ee85f2ff243 ]--- Signed-off-by: Jonathan Bakker Signed-off-by: Paweł Chmiel Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/bma150.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c index 1efcfdf9f8a8..dd9dd4e40827 100644 --- a/drivers/input/misc/bma150.c +++ b/drivers/input/misc/bma150.c @@ -481,13 +481,14 @@ static int bma150_register_input_device(struct bma150_data *bma150) idev->close = bma150_irq_close; input_set_drvdata(idev, bma150); + bma150->input = idev; + error = input_register_device(idev); if (error) { input_free_device(idev); return error; } - bma150->input = idev; return 0; } @@ -510,15 +511,15 @@ static int bma150_register_polled_device(struct bma150_data *bma150) bma150_init_input_device(bma150, ipoll_dev->input); + bma150->input_polled = ipoll_dev; + bma150->input = ipoll_dev->input; + error = input_register_polled_device(ipoll_dev); if (error) { input_free_polled_device(ipoll_dev); return error; } - bma150->input_polled = ipoll_dev; - bma150->input = ipoll_dev->input; - return 0; } From dd957493baa586f1431490f97f9c7c45eaf8ab10 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 3 Feb 2019 10:02:07 +0100 Subject: [PATCH 0698/1436] libata: Add NOLPM quirk for SAMSUNG MZ7TE512HMHP-000L1 SSD We've received a bugreport that using LPM with a SAMSUNG MZ7TE512HMHP-000L1 SSD leads to system instability, we already have a quirk for the MZ7TD256HAFV-000L9, which is also a Samsun EVO 840 / PM851 OEM model, so it seems some of these models have a LPM issue. This commits adds a NOLPM quirk for the model string from the new bugeport, to avoid the reported stability issues. Cc: stable@vger.kernel.org BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1571330 Signed-off-by: Hans de Goede Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b8c3f9e6af89..adf28788cab5 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4554,6 +4554,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | From 30363d6506d0d202bb14f4dac36d9b4b0714ad8d Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 1 Feb 2019 11:26:02 +0800 Subject: [PATCH 0699/1436] m68k: set proper major_num when specifying module param major_num When calling register_blkdev() with specified major device number, the return code is 0 on success. So it seems not correct direct assign return code to variable major_num in this case. Tested-by: Michael Schmitz Reviewed-by: Geert Uytterhoeven Signed-off-by: Chengguang Xu Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 38049357d6d3..40712e49381b 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -155,18 +155,22 @@ out: static int __init nfhd_init(void) { u32 blocks, bsize; + int ret; int i; nfhd_id = nf_get_id("XHDI"); if (!nfhd_id) return -ENODEV; - major_num = register_blkdev(major_num, "nfhd"); - if (major_num <= 0) { + ret = register_blkdev(major_num, "nfhd"); + if (ret < 0) { pr_warn("nfhd: unable to get major number\n"); - return major_num; + return ret; } + if (!major_num) + major_num = ret; + for (i = NFHD_DEV_OFFSET; i < 24; i++) { if (nfhd_get_capacity(i, 0, &blocks, &bsize)) continue; From 2b1f86bf741a3220b4471fef197631d6e2264983 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 6 Feb 2019 10:54:54 +0000 Subject: [PATCH 0700/1436] MAINTAINERS: add maintainer for SFF/SFP/SFP+ support Add maintainer entry for SFF/SFP/SFP+ support. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 479b03221a4c..39168d401b26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13708,6 +13708,15 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/sfc/ +SFF/SFP/SFP+ MODULE SUPPORT +M: Russell King +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/phy/phylink.c +F: drivers/net/phy/sfp* +F: include/linux/phylink.h +F: include/linux/sfp.h + SGI GRU DRIVER M: Dimitri Sivanich S: Maintained From 43636c804df0126da669c261fc820fb22f62bfc2 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 21 Jan 2019 22:49:37 +0900 Subject: [PATCH 0701/1436] fs: ratelimit __find_get_block_slow() failure message. When something let __find_get_block_slow() hit all_mapped path, it calls printk() for 100+ times per a second. But there is no need to print same message with such high frequency; it is just asking for stall warning, or at least bloating log files. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.873324][T15342] b_state=0x00000029, b_size=512 [ 399.878403][T15342] device loop0 blocksize: 4096 [ 399.883296][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.890400][T15342] b_state=0x00000029, b_size=512 [ 399.895595][T15342] device loop0 blocksize: 4096 [ 399.900556][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.907471][T15342] b_state=0x00000029, b_size=512 [ 399.912506][T15342] device loop0 blocksize: 4096 This patch reduces frequency to up to once per a second, in addition to concatenating three lines into one. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8, b_state=0x00000029, b_size=512, device loop0 blocksize: 4096 Signed-off-by: Tetsuo Handa Reviewed-by: Jan Kara Cc: Dmitry Vyukov Signed-off-by: Jens Axboe --- fs/buffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 52d024bfdbc1..48318fb74938 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); From e3fdc89ca47ef34dfb6fd5101fec084c3dba5486 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Jan 2019 15:58:38 -0500 Subject: [PATCH 0702/1436] nfsd: Fix error return values for nfsd4_clone_file_range() If the parameter 'count' is non-zero, nfsd4_clone_file_range() will currently clobber all errors returned by vfs_clone_file_range() and replace them with EINVAL. Fixes: 42ec3d4c0218 ("vfs: make remap_file_range functions take and...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9824e32b2f23..7dc98e14655d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -557,9 +557,11 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, loff_t cloned; cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); + if (cloned < 0) + return nfserrno(cloned); if (count && cloned != count) - cloned = -EINVAL; - return nfserrno(cloned < 0 ? cloned : 0); + return nfserrno(-EINVAL); + return 0; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, From e248aa7be86e8179f20ac0931774ecd746f3f5bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 25 Jan 2019 16:54:54 -0500 Subject: [PATCH 0703/1436] svcrdma: Remove max_sge check at connect time Two and a half years ago, the client was changed to use gathered Send for larger inline messages, in commit 655fec6987b ("xprtrdma: Use gathered Send for large inline messages"). Several fixes were required because there are a few in-kernel device drivers whose max_sge is 3, and these were broken by the change. Apparently my memory is going, because some time later, I submitted commit 25fd86eca11c ("svcrdma: Don't overrun the SGE array in svc_rdma_send_ctxt"), and after that, commit f3c1fd0ee294 ("svcrdma: Reduce max_send_sges"). These too incorrectly assumed in-kernel device drivers would have more than a few Send SGEs available. The fix for the server side is not the same. This is because the fundamental problem on the server is that, whether or not the client has provisioned a chunk for the RPC reply, the server must squeeze even the most complex RPC replies into a single RDMA Send. Failing in the send path because of Send SGE exhaustion should never be an option. Therefore, instead of failing when the send path runs out of SGEs, switch to using a bounce buffer mechanism to handle RPC replies that are too complex for the device to send directly. That allows us to remove the max_sge check to enable drivers with small max_sge to work again. Reported-by: Don Dutile Fixes: 25fd86eca11c ("svcrdma: Don't overrun the SGE array in ...") Cc: stable@vger.kernel.org Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 105 +++++++++++++++++++++-- net/sunrpc/xprtrdma/svc_rdma_transport.c | 9 +- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index cf51b8f9b15f..1f200119268c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, DMA_TO_DEVICE); } +/* If the xdr_buf has more elements than the device can + * transmit in a single RDMA Send, then the reply will + * have to be copied into a bounce buffer. + */ +static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, + struct xdr_buf *xdr, + __be32 *wr_lst) +{ + int elements; + + /* xdr->head */ + elements = 1; + + /* xdr->pages */ + if (!wr_lst) { + unsigned int remaining; + unsigned long pageoff; + + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + ++elements; + remaining -= min_t(u32, PAGE_SIZE - pageoff, + remaining); + pageoff = 0; + } + } + + /* xdr->tail */ + if (xdr->tail[0].iov_len) + ++elements; + + /* assume 1 SGE is needed for the transport header */ + return elements >= rdma->sc_max_send_sges; +} + +/* The device is not capable of sending the reply directly. + * Assemble the elements of @xdr into the transport header + * buffer. + */ +static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst) +{ + unsigned char *dst, *tailbase; + unsigned int taillen; + + dst = ctxt->sc_xprt_buf; + dst += ctxt->sc_sges[0].length; + + memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); + dst += xdr->head[0].iov_len; + + tailbase = xdr->tail[0].iov_base; + taillen = xdr->tail[0].iov_len; + if (wr_lst) { + u32 xdrpad; + + xdrpad = xdr_padsize(xdr->page_len); + if (taillen && xdrpad) { + tailbase += xdrpad; + taillen -= xdrpad; + } + } else { + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + + memcpy(dst, page_address(*ppages), len); + remaining -= len; + dst += len; + pageoff = 0; + } + } + + if (taillen) + memcpy(dst, tailbase, taillen); + + ctxt->sc_sges[0].length += xdr->len; + ib_dma_sync_single_for_device(rdma->sc_pd->device, + ctxt->sc_sges[0].addr, + ctxt->sc_sges[0].length, + DMA_TO_DEVICE); + + return 0; +} + /* svc_rdma_map_reply_msg - Map the buffer holding RPC message * @rdma: controlling transport * @ctxt: send_ctxt for the Send WR @@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, u32 xdr_pad; int ret; - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) + return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); + + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, xdr->head[0].iov_base, xdr->head[0].iov_len); @@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - page_off, remaining); - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, page_off, len); if (ret < 0) @@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, len = xdr->tail[0].iov_len; tail: if (len) { - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); if (ret < 0) return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 924c17d46903..57f86c63a463 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Transport header, head iovec, tail iovec */ newxprt->sc_max_send_sges = 3; /* Add one SGE per page list entry */ - newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; - if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { - pr_err("svcrdma: too few Send SGEs available (%d needed)\n", - newxprt->sc_max_send_sges); - goto errout; - } + newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; + if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; From 04c7788c2bb3542e5c8c9d7175193e4d925edf20 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 6 Feb 2019 13:30:17 +0100 Subject: [PATCH 0704/1436] r8169: Load MAC address from device tree if present If the system was booted using a device tree and if the device tree contains a MAC address, use it instead of reading one from the EEPROM. This is useful in situations where the EEPROM isn't properly programmed or where the firmware wants to override the existing MAC address. Signed-off-by: Thierry Reding Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 36 ++++++++++++++++++---------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e8a112149a62..501891be7c56 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7110,6 +7110,21 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags); } +static void rtl_read_mac_address(struct rtl8169_private *tp, + u8 mac_addr[ETH_ALEN]) +{ + /* Get MAC address */ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + *(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC); + *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); + break; + default: + break; + } +} + DECLARE_RTL_COND(rtl_link_list_ready_cond) { return RTL_R8(tp, MCU) & LINK_LIST_RDY; @@ -7301,6 +7316,7 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; + u8 mac_addr[ETH_ALEN] __aligned(4) = {}; struct rtl8169_private *tp; struct net_device *dev; int chipset, region, i; @@ -7403,20 +7419,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); - /* Get MAC address */ - switch (tp->mac_version) { - u8 mac_addr[ETH_ALEN] __aligned(4); - case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - *(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC); - *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); + /* get MAC address */ + rc = eth_platform_get_mac_address(&pdev->dev, mac_addr); + if (rc) + rtl_read_mac_address(tp, mac_addr); + + if (is_valid_ether_addr(mac_addr)) + rtl_rar_set(tp, mac_addr); - if (is_valid_ether_addr(mac_addr)) - rtl_rar_set(tp, mac_addr); - break; - default: - break; - } for (i = 0; i < ETH_ALEN; i++) dev->dev_addr[i] = RTL_R8(tp, MAC0 + i); From deedf1feb255c7a390309f615e50de37cb82fb61 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 6 Feb 2019 13:30:18 +0100 Subject: [PATCH 0705/1436] r8169: Avoid pointer aliasing Read MAC address 32-bit at a time and manually extract the individual bytes. This avoids pointer aliasing and gives the compiler a better chance of optimizing the operation. Suggested-by: Andrew Lunn Signed-off-by: Thierry Reding Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 501891be7c56..1dd72137fd53 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7113,12 +7113,21 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) static void rtl_read_mac_address(struct rtl8169_private *tp, u8 mac_addr[ETH_ALEN]) { + u32 value; + /* Get MAC address */ switch (tp->mac_version) { case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38: case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - *(u32 *)&mac_addr[0] = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC); - *(u16 *)&mac_addr[4] = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); + value = rtl_eri_read(tp, 0xe0, ERIAR_EXGMAC); + mac_addr[0] = (value >> 0) & 0xff; + mac_addr[1] = (value >> 8) & 0xff; + mac_addr[2] = (value >> 16) & 0xff; + mac_addr[3] = (value >> 24) & 0xff; + + value = rtl_eri_read(tp, 0xe4, ERIAR_EXGMAC); + mac_addr[4] = (value >> 0) & 0xff; + mac_addr[5] = (value >> 8) & 0xff; break; default: break; @@ -7316,7 +7325,8 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; - u8 mac_addr[ETH_ALEN] __aligned(4) = {}; + /* align to u16 for is_valid_ether_addr() */ + u8 mac_addr[ETH_ALEN] __aligned(2) = {}; struct rtl8169_private *tp; struct net_device *dev; int chipset, region, i; From 249f62b6edad70d5eba43caf09526ee035336e0a Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 6 Feb 2019 18:27:13 +0530 Subject: [PATCH 0706/1436] cxgb4: Add new T6 PCI device ids 0x608b Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index bf7325f6d553..0c5373462ced 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -218,6 +218,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6088), /* Custom T62100-CR */ CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */ CH_PCI_ID_TABLE_FENTRY(0x608a), /* Custom T62100-CR */ + CH_PCI_ID_TABLE_FENTRY(0x608b), /* Custom T6225-CR */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ From 47b98039fb6e1b3fc7b04a86a9c8ed5bbf604103 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 6 Feb 2019 18:31:36 +0530 Subject: [PATCH 0707/1436] cxgb4: Update 1.22.9.0 as the latest firmware supported. Change t4fw_version.h to update latest firmware version number to 1.22.9.0. Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index a844296135b4..9125ddd89dd1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x14 -#define T4FW_VERSION_MICRO 0x08 +#define T4FW_VERSION_MINOR 0x16 +#define T4FW_VERSION_MICRO 0x09 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x14 -#define T5FW_VERSION_MICRO 0x08 +#define T5FW_VERSION_MINOR 0x16 +#define T5FW_VERSION_MICRO 0x09 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x14 -#define T6FW_VERSION_MICRO 0x08 +#define T6FW_VERSION_MINOR 0x16 +#define T6FW_VERSION_MICRO 0x09 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 From 00670cb8a73b10b10d3c40f045c15411715e4465 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Feb 2019 18:35:15 +0300 Subject: [PATCH 0708/1436] net: dsa: Fix NULL checking in dsa_slave_set_eee() This function can't succeed if dp->pl is NULL. It will Oops inside the call to return phylink_ethtool_get_eee(dp->pl, e); Fixes: 1be52e97ed3e ("dsa: slave: eee: Allow ports to use phylink") Signed-off-by: Dan Carpenter Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b5e44825d173..a1c9fe155057 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -642,7 +642,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -662,7 +662,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) From d6abc5969463359c366d459247b90366fcd6f5c5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:35 -0800 Subject: [PATCH 0709/1436] net: Introduce ndo_get_port_parent_id() In preparation for getting rid of switchdev_ops, create a dedicated NDO operation for getting the port's parent identifier. There are essentially two classes of drivers that need to implement getting the port's parent ID which are VF/PF drivers with a built-in switch, and pure switchdev drivers such as mlxsw, ocelot, dsa etc. We introduce a helper function: dev_get_port_parent_id() which supports recursion into the lower devices to obtain the first port's parent ID. Convert the bridge, core and ipv4 multicast routing code to check for such ndo_get_port_parent_id() and call the helper function when valid before falling back to switchdev_port_attr_get(). This will allow us to convert all relevant drivers in one go instead of having to implement both switchdev_port_attr_get() and ndo_get_port_parent_id() operations, then get rid of switchdev_port_attr_get(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++ net/bridge/br_switchdev.c | 9 +++++-- net/core/dev.c | 57 +++++++++++++++++++++++++++++++++++++++ net/core/net-sysfs.c | 7 ++++- net/core/rtnetlink.c | 6 ++++- net/ipv4/ipmr.c | 8 +++++- 6 files changed, 91 insertions(+), 5 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba57d0ba425e..1d95e634f3fe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1188,6 +1188,10 @@ struct dev_ifalias { * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * + * int (*ndo_get_port_parent_id)(struct net_device *dev, + * struct netdev_phys_item_id *ppid) + * Called to get the parent ID of the physical port of this device. + * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@ -1412,6 +1416,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_port_parent_id)(struct net_device *dev, + struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); void (*ndo_udp_tunnel_add)(struct net_device *dev, @@ -3651,6 +3657,9 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4d2b9eb7604a..06b0ae44585f 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -14,7 +14,8 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (switchdev_port_same_parent_id(dev, p->dev)) + if (netdev_port_same_parent_id(dev, p->dev) || + switchdev_port_same_parent_id(dev, p->dev)) return p->offload_fwd_mark; } @@ -23,6 +24,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) int nbp_switchdev_mark_set(struct net_bridge_port *p) { + const struct net_device_ops *ops = p->dev->netdev_ops; struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -31,7 +33,10 @@ int nbp_switchdev_mark_set(struct net_bridge_port *p) ASSERT_RTNL(); - err = switchdev_port_attr_get(p->dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true); + else + err = switchdev_port_attr_get(p->dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index bfa4be42afff..8c6d5cf8a308 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7877,6 +7877,63 @@ int dev_get_phys_port_name(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_name); +/** + * dev_get_port_parent_id - Get the device's port parent identifier + * @dev: network device + * @ppid: pointer to a storage for the port's parent identifier + * @recurse: allow/disallow recursion to lower devices + * + * Get the devices's port parent identifier + */ +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, + bool recurse) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct netdev_phys_item_id first = { }; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops->ndo_get_port_parent_id) + return ops->ndo_get_port_parent_id(dev, ppid); + + if (!recurse) + return err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = dev_get_port_parent_id(lower_dev, ppid, recurse); + if (err) + break; + if (!first.id_len) + first = *ppid; + else if (memcmp(&first, ppid, sizeof(*ppid))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL(dev_get_port_parent_id); + +/** + * netdev_port_same_parent_id - Indicate if two network devices have + * the same port parent identifier + * @a: first network device + * @b: second network device + */ +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) +{ + struct netdev_phys_item_id a_id = { }; + struct netdev_phys_item_id b_id = { }; + + if (dev_get_port_parent_id(a, &a_id, true) || + dev_get_port_parent_id(b, &b_id, true)) + return false; + + return netdev_phys_item_id_same(&a_id, &b_id); +} +EXPORT_SYMBOL(netdev_port_same_parent_id); + /** * dev_change_proto_down - update protocol port state information * @dev: device diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index ff9fd2bb4ce4..4eace9f1dcf9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -495,6 +495,7 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + const struct net_device_ops *ops = netdev->netdev_ops; ssize_t ret = -EINVAL; if (!rtnl_trylock()) @@ -507,7 +508,11 @@ static ssize_t phys_switch_id_show(struct device *dev, .flags = SWITCHDEV_F_NO_RECURSE, }; - ret = switchdev_port_attr_get(netdev, &attr); + if (ops->ndo_get_port_parent_id) + ret = dev_get_port_parent_id(netdev, &attr.u.ppid, + false); + else + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, attr.u.ppid.id); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5a98082ac7a..90dd02c1f561 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1146,6 +1146,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct switchdev_attr attr = { .orig_dev = dev, @@ -1153,7 +1154,10 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) .flags = SWITCHDEV_F_NO_RECURSE, }; - err = switchdev_port_attr_get(dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(dev, &attr.u.ppid, false); + else + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fb99002c3d4e..c71bcc42d66d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -837,6 +837,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { + const struct net_device_ops *ops; int vifi = vifc->vifc_vifi; struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -920,7 +921,12 @@ static int vif_add(struct net *net, struct mr_table *mrt, (VIFF_TUNNEL | VIFF_REGISTER)); attr.orig_dev = dev; - if (!switchdev_port_attr_get(dev, &attr)) { + ops = dev->netdev_ops; + if (ops->ndo_get_port_parent_id && + !dev_get_port_parent_id(dev, &attr.u.ppid, true)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else if (!switchdev_port_attr_get(dev, &attr)) { memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); v->dev_parent_id.id_len = attr.u.ppid.id_len; } else { From 52d5254a2d045bba2a744042319c64e1fe41b5c8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:36 -0800 Subject: [PATCH 0710/1436] bnxt: Implement ndo_get_port_parent_id() BNXT only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID, which makes it a great candidate to be converted to use the ndo_get_port_parent_id() NDO instead of implementing switchdev_port_attr_get(). The conversion is straight forward here since the PF and VF code use the same getter. Since bnxt makes uses of switchdev_port_same_parent_id() convert it to use netdev_port_same_parent_id(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 28 ++++++------------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 +-- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 12 +++----- 4 files changed, 15 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a512871176b..1c2987c3d708 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9981,8 +9981,11 @@ static int bnxt_get_phys_port_name(struct net_device *dev, char *buf, return 0; } -int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr) +int bnxt_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { + struct bnxt *bp = netdev_priv(dev); + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) return -EOPNOTSUPP; @@ -9990,27 +9993,12 @@ int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr) if (!BNXT_PF(bp)) return -EOPNOTSUPP; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(bp->switch_id); - memcpy(attr->u.ppid.id, bp->switch_id, attr->u.ppid.id_len); - break; - default: - return -EOPNOTSUPP; - } + ppid->id_len = sizeof(bp->switch_id); + memcpy(ppid->id, bp->switch_id, ppid->id_len); + return 0; } -static int bnxt_swdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return bnxt_port_attr_get(netdev_priv(dev), attr); -} - -static const struct switchdev_ops bnxt_switchdev_ops = { - .switchdev_port_attr_get = bnxt_swdev_port_attr_get -}; - static const struct net_device_ops bnxt_netdev_ops = { .ndo_open = bnxt_open, .ndo_start_xmit = bnxt_start_xmit, @@ -10042,6 +10030,7 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, + .ndo_get_port_parent_id = bnxt_get_port_parent_id, .ndo_get_phys_port_name = bnxt_get_phys_port_name }; @@ -10400,7 +10389,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->netdev_ops = &bnxt_netdev_ops; dev->watchdog_timeo = BNXT_TX_TIMEOUT; dev->ethtool_ops = &bnxt_ethtool_ops; - SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops); pci_set_drvdata(pdev, dev); rc = bnxt_alloc_hwrm_resources(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5c886a700bcc..17554d4be651 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -1795,7 +1794,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); int bnxt_restore_pf_fw_resources(struct bnxt *bp); -int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr); +int bnxt_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid); void bnxt_dim_work(struct work_struct *work); int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 7a1fdad1aea6..61a3457ed71b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -45,7 +45,7 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev) struct bnxt *bp; /* check if dev belongs to the same switch */ - if (!switchdev_port_same_parent_id(pf_bp->dev, dev)) { + if (!netdev_port_same_parent_id(pf_bp->dev, dev)) { netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch", dev->ifindex); return BNXT_FID_INVALID; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 9a25c05aa571..2bdd2da9aac7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -237,21 +237,17 @@ static void bnxt_vf_rep_get_drvinfo(struct net_device *dev, strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); } -static int bnxt_vf_rep_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) +static int bnxt_vf_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct bnxt_vf_rep *vf_rep = netdev_priv(dev); /* as only PORT_PARENT_ID is supported currently use common code * between PF and VF-rep for now. */ - return bnxt_port_attr_get(vf_rep->bp, attr); + return bnxt_get_port_parent_id(vf_rep->bp->dev, ppid); } -static const struct switchdev_ops bnxt_vf_rep_switchdev_ops = { - .switchdev_port_attr_get = bnxt_vf_rep_port_attr_get -}; - static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = { .get_drvinfo = bnxt_vf_rep_get_drvinfo }; @@ -262,6 +258,7 @@ static const struct net_device_ops bnxt_vf_rep_netdev_ops = { .ndo_start_xmit = bnxt_vf_rep_xmit, .ndo_get_stats64 = bnxt_vf_rep_get_stats64, .ndo_setup_tc = bnxt_vf_rep_setup_tc, + .ndo_get_port_parent_id = bnxt_vf_rep_get_port_parent_id, .ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name }; @@ -392,7 +389,6 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, dev->netdev_ops = &bnxt_vf_rep_netdev_ops; dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; - SWITCHDEV_SET_OPS(dev, &bnxt_vf_rep_switchdev_ops); /* Just inherit all the featues of the parent PF as the VF-R * uses the RX/TX rings of the parent PF */ From c4bf24508e1f51cdcd5b810077e3cbcf21f4302c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:37 -0800 Subject: [PATCH 0711/1436] liquidio: Implement ndo_get_port_parent_id() Liquidio only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID, which makes it a great candidate to be converted to use the ndo_get_port_parent_id() NDO instead of implementing switchdev_port_attr_get(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 22 ++++------------ .../net/ethernet/cavium/liquidio/lio_vf_rep.c | 25 ++++++------------- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 3d24133e5e49..e97e6754ee09 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -3184,7 +3183,8 @@ static const struct devlink_ops liquidio_devlink_ops = { }; static int -lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr) +liquidio_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct lio *lio = GET_LIO(dev); struct octeon_device *oct = lio->oct_dev; @@ -3192,24 +3192,12 @@ lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr) if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) return -EOPNOTSUPP; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, - (void *)&lio->linfo.hw_addr + 2); - break; - - default: - return -EOPNOTSUPP; - } + ppid->id_len = ETH_ALEN; + ether_addr_copy(ppid->id, (void *)&lio->linfo.hw_addr + 2); return 0; } -static const struct switchdev_ops lio_pf_switchdev_ops = { - .switchdev_port_attr_get = lio_pf_switchdev_attr_get, -}; - static int liquidio_get_vf_stats(struct net_device *netdev, int vfidx, struct ifla_vf_stats *vf_stats) { @@ -3259,6 +3247,7 @@ static const struct net_device_ops lionetdevops = { .ndo_set_vf_trust = liquidio_set_vf_trust, .ndo_set_vf_link_state = liquidio_set_vf_link_state, .ndo_get_vf_stats = liquidio_get_vf_stats, + .ndo_get_port_parent_id = liquidio_get_port_parent_id, }; /** \brief Entry point for the liquidio module @@ -3534,7 +3523,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) * netdev tasks. */ netdev->netdev_ops = &lionetdevops; - SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops); retval = netif_set_real_num_rx_queues(netdev, num_oqueues); if (retval) { diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index de61060721c4..f3f2e71431ac 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -25,7 +25,6 @@ #include "octeon_nic.h" #include "octeon_main.h" #include "octeon_network.h" -#include #include "lio_vf_rep.h" static int lio_vf_rep_open(struct net_device *ndev); @@ -38,6 +37,8 @@ static int lio_vf_rep_phys_port_name(struct net_device *dev, static void lio_vf_rep_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64); static int lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu); +static int lio_vf_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid); static const struct net_device_ops lio_vf_rep_ndev_ops = { .ndo_open = lio_vf_rep_open, @@ -47,6 +48,7 @@ static const struct net_device_ops lio_vf_rep_ndev_ops = { .ndo_get_phys_port_name = lio_vf_rep_phys_port_name, .ndo_get_stats64 = lio_vf_rep_get_stats64, .ndo_change_mtu = lio_vf_rep_change_mtu, + .ndo_get_port_parent_id = lio_vf_get_port_parent_id, }; static int @@ -443,31 +445,19 @@ xmit_failed: return NETDEV_TX_OK; } -static int -lio_vf_rep_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int lio_vf_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); struct net_device *parent_ndev = vf_rep->parent_ndev; struct lio *lio = GET_LIO(parent_ndev); - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, - (void *)&lio->linfo.hw_addr + 2); - break; - - default: - return -EOPNOTSUPP; - } + ppid->id_len = ETH_ALEN; + ether_addr_copy(ppid->id, (void *)&lio->linfo.hw_addr + 2); return 0; } -static const struct switchdev_ops lio_vf_rep_switchdev_ops = { - .switchdev_port_attr_get = lio_vf_rep_attr_get, -}; - static void lio_vf_rep_fetch_stats(struct work_struct *work) { @@ -524,7 +514,6 @@ lio_vf_rep_create(struct octeon_device *oct) ndev->min_mtu = LIO_MIN_MTU_SIZE; ndev->max_mtu = LIO_MAX_MTU_SIZE; ndev->netdev_ops = &lio_vf_rep_ndev_ops; - SWITCHDEV_SET_OPS(ndev, &lio_vf_rep_switchdev_ops); vf_rep = netdev_priv(ndev); memset(vf_rep, 0, sizeof(*vf_rep)); From 6dcfa2343802b1550530f3ba284cb0179cf2efc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:38 -0800 Subject: [PATCH 0712/1436] net/mlx5e: Implement ndo_get_port_parent_id() mlx5e only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID, which makes it a great candidate to be converted to use the ndo_get_port_parent_id() NDO instead of implementing switchdev_port_attr_get(). Since mlx5e makes use of switchdev_port_parent_id() convert it to use netdev_port_same_parent_id(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 31 +++++++------------ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 5 ++- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 47bb4eb894c2..9e71f4d41b82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -25,7 +25,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dev) || + if (!netdev_port_same_parent_id(priv->netdev, dev) || dst_is_lag_dev) { *route_dev = uplink_dev; *out_dev = *route_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5d2e0c2f6624..0b1988b330f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -381,7 +381,8 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; -static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -398,20 +399,14 @@ static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) uplink_priv = netdev_priv(uplink_dev); } - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + ppid->id_len = ETH_ALEN; + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { + ether_addr_copy(ppid->id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); - } - break; - default: - return -EOPNOTSUPP; + ether_addr_copy(ppid->id, rep->hw_id); } return 0; @@ -1284,10 +1279,6 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static const struct switchdev_ops mlx5e_rep_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; - static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { .ndo_open = mlx5e_vf_rep_open, .ndo_stop = mlx5e_vf_rep_close, @@ -1298,6 +1289,7 @@ static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { @@ -1319,6 +1311,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; bool mlx5e_eswitch_rep(struct net_device *netdev) @@ -1393,8 +1386,6 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->watchdog_timeo = 15 * HZ; - netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 83522c926d7c..85c5dd7fc2c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -2525,8 +2524,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (switchdev_port_same_parent_id(priv->netdev, - out_dev) || + if (netdev_port_same_parent_id(priv->netdev, + out_dev) || is_merged_eswitch_dev(priv, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); From 25ba8605147689462c750eb9f2e53f6b88bd953a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:39 -0800 Subject: [PATCH 0713/1436] mlxsw: Implement ndo_get_port_parent_id() mlxsw implements SWITCHDEV_ATTR_ID_PORT_PARENT_ID and we want to get rid of switchdev_ops eventually, ease that migration by implementing a ndo_get_port_parent_id() function which returns what switchdev_port_attr_get() would do. Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++ .../mellanox/mlxsw/spectrum_switchdev.c | 5 --- .../net/ethernet/mellanox/mlxsw/switchx2.c | 37 +++++++------------ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a88169738b4a..8dd808b7f931 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1700,6 +1700,18 @@ static int mlxsw_sp_set_features(struct net_device *dev, mlxsw_sp_feature_hw_tc); } +static int mlxsw_sp_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + ppid->id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&ppid->id, &mlxsw_sp->base_mac, ppid->id_len); + + return 0; +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -1715,6 +1727,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name, .ndo_set_features = mlxsw_sp_set_features, + .ndo_get_port_parent_id = mlxsw_sp_port_get_port_parent_id, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a4a9fe992193..95e37de3e48f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -451,11 +451,6 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev, &attr->u.brport_flags); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2d4f213e154d..533fe6235b7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "pci.h" #include "core.h" @@ -390,6 +389,18 @@ static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, name, len); } +static int mlxsw_sx_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + + ppid->id_len = sizeof(mlxsw_sx->hw_id); + memcpy(&ppid->id, &mlxsw_sx->hw_id, ppid->id_len); + + return 0; +} + static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_open = mlxsw_sx_port_open, .ndo_stop = mlxsw_sx_port_stop, @@ -397,6 +408,7 @@ static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_change_mtu = mlxsw_sx_port_change_mtu, .ndo_get_stats64 = mlxsw_sx_port_get_stats64, .ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name, + .ndo_get_port_parent_id = mlxsw_sx_port_get_port_parent_id, }; static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, @@ -901,28 +913,6 @@ static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = { .set_link_ksettings = mlxsw_sx_port_set_link_ksettings, }; -static int mlxsw_sx_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); - struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); - memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = { - .switchdev_port_attr_get = mlxsw_sx_port_attr_get, -}; - static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx) { char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; @@ -1034,7 +1024,6 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, dev->netdev_ops = &mlxsw_sx_port_netdev_ops; dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops; - dev->switchdev_ops = &mlxsw_sx_port_switchdev_ops; err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port); if (err) { From 751302c35ea0ae5aac9918e85199eb9f56a5afac Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:40 -0800 Subject: [PATCH 0714/1436] mscc: ocelot: Implement ndo_get_port_parent_id() Ocelot only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID as a valid switchdev attribute getter, convert it to use ndo_get_port_parent_id() and get rid of the switchdev_ops::switchdev_port_attr_get altogether. Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 33 ++++++++++++------------------ 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c6a575eb0ff5..195306d05bcd 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -916,6 +916,18 @@ static int ocelot_set_features(struct net_device *dev, return 0; } +static int ocelot_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct ocelot_port *ocelot_port = netdev_priv(dev); + struct ocelot *ocelot = ocelot_port->ocelot; + + ppid->id_len = sizeof(ocelot->base_mac); + memcpy(&ppid->id, &ocelot->base_mac, ppid->id_len); + + return 0; +} + static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_open = ocelot_port_open, .ndo_stop = ocelot_port_stop, @@ -930,6 +942,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_vlan_rx_add_vid = ocelot_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid, .ndo_set_features = ocelot_set_features, + .ndo_get_port_parent_id = ocelot_get_port_parent_id, }; static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data) @@ -1013,25 +1026,6 @@ static const struct ethtool_ops ocelot_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; -static int ocelot_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - struct ocelot_port *ocelot_port = netdev_priv(dev); - struct ocelot *ocelot = ocelot_port->ocelot; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(ocelot->base_mac); - memcpy(&attr->u.ppid.id, &ocelot->base_mac, - attr->u.ppid.id_len); - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - static int ocelot_port_attr_stp_state_set(struct ocelot_port *ocelot_port, struct switchdev_trans *trans, u8 state) @@ -1331,7 +1325,6 @@ static int ocelot_port_obj_del(struct net_device *dev, } static const struct switchdev_ops ocelot_port_switchdev_ops = { - .switchdev_port_attr_get = ocelot_port_attr_get, .switchdev_port_attr_set = ocelot_port_attr_set, }; From a5084bb71fa4cc2a70a3a5775ccd0447dbcb32f3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:41 -0800 Subject: [PATCH 0715/1436] nfp: Implement ndo_get_port_parent_id() NFP only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID, which makes it a great candidate to be converted to use the ndo_get_port_parent_id() NDO instead of implementing switchdev_port_attr_get(). Since NFP uses switchdev_port_same_parent_id() convert it to use netdev_port_same_parent_id(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 3 +-- .../ethernet/netronome/nfp/nfp_net_common.c | 4 +--- .../net/ethernet/netronome/nfp/nfp_net_repr.c | 4 +--- drivers/net/ethernet/netronome/nfp/nfp_port.c | 22 +++++-------------- drivers/net/ethernet/netronome/nfp/nfp_port.h | 4 +++- 5 files changed, 11 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 6b1d56b021ac..583e97c99e68 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -138,7 +137,7 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, if (nfp_netdev_is_nfp_repr(in_dev)) { /* Confirm ingress and egress are on same device. */ - if (!switchdev_port_same_parent_id(in_dev, out_dev)) + if (!netdev_port_same_parent_id(in_dev, out_dev)) return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7d2d4241498f..776f6c07701b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -36,7 +36,6 @@ #include #include -#include #include #include "nfpcore/nfp_nsp.h" @@ -3531,6 +3530,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, .ndo_bpf = nfp_net_xdp, + .ndo_get_port_parent_id = nfp_port_get_port_parent_id, }; /** @@ -3815,8 +3815,6 @@ static void nfp_net_netdev_init(struct nfp_net *nn) netdev->netdev_ops = &nfp_net_netdev_ops; netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); - SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops); - /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = nn->max_mtu; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 69d7aebda09b..62839807e21e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -5,7 +5,6 @@ #include #include #include -#include #include "nfpcore/nfp_cpp.h" #include "nfpcore/nfp_nsp.h" @@ -273,6 +272,7 @@ const struct net_device_ops nfp_repr_netdev_ops = { .ndo_fix_features = nfp_repr_fix_features, .ndo_set_features = nfp_port_set_features, .ndo_set_mac_address = eth_mac_addr, + .ndo_get_port_parent_id = nfp_port_get_port_parent_id, }; void @@ -336,8 +336,6 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, netdev->max_mtu = pf_netdev->max_mtu; - SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops); - /* Set features the lower device can support with representors */ if (repr_cap & NFP_NET_CFG_CTRL_LIVE_ADDR) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index 86bc149ca231..7e90880fa46b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -31,34 +31,22 @@ struct nfp_port *nfp_port_from_netdev(struct net_device *netdev) return NULL; } -static int -nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr) +int nfp_port_get_port_parent_id(struct net_device *netdev, + struct netdev_phys_item_id *ppid) { struct nfp_port *port; + const u8 *serial; port = nfp_port_from_netdev(netdev); if (!port) return -EOPNOTSUPP; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: { - const u8 *serial; - /* N.B: attr->u.ppid.id is binary data */ - attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial); - memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len); - break; - } - default: - return -EOPNOTSUPP; - } + ppid->id_len = nfp_cpp_serial(port->app->cpp, &serial); + memcpy(&ppid->id, serial, ppid->id_len); return 0; } -const struct switchdev_ops nfp_port_switchdev_ops = { - .switchdev_port_attr_get = nfp_port_attr_get, -}; - int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index b2479a2a49e5..90ae053f5c07 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -7,6 +7,7 @@ #include struct net_device; +struct netdev_phys_item_id; struct nfp_app; struct nfp_pf; struct nfp_port; @@ -90,7 +91,6 @@ struct nfp_port { }; extern const struct ethtool_ops nfp_port_ethtool_ops; -extern const struct switchdev_ops nfp_port_switchdev_ops; __printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...); @@ -106,6 +106,8 @@ int nfp_port_set_features(struct net_device *netdev, netdev_features_t features); struct nfp_port *nfp_port_from_netdev(struct net_device *netdev); +int nfp_port_get_port_parent_id(struct net_device *netdev, + struct netdev_phys_item_id *ppid); struct nfp_port * nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id); struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port); From 7026b8a6fbe1061863a25495d4dc1fa115c8708e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:42 -0800 Subject: [PATCH 0716/1436] rocker: Implement ndo_get_port_parent_id() mlxsw implements SWITCHDEV_ATTR_ID_PORT_PARENT_ID and we want to get rid of switchdev_ops eventually, ease that migration by implementing a ndo_get_port_parent_id() function which returns what switchdev_port_attr_get() would do. Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 62a205eba9f7..66f72f8c46e5 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2026,6 +2026,18 @@ static void rocker_port_neigh_destroy(struct net_device *dev, err); } +static int rocker_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + const struct rocker_port *rocker_port = netdev_priv(dev); + const struct rocker *rocker = rocker_port->rocker; + + ppid->id_len = sizeof(rocker->hw.id); + memcpy(&ppid->id, &rocker->hw.id, ppid->id_len); + + return 0; +} + static const struct net_device_ops rocker_port_netdev_ops = { .ndo_open = rocker_port_open, .ndo_stop = rocker_port_stop, @@ -2035,6 +2047,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_get_phys_port_name = rocker_port_get_phys_port_name, .ndo_change_proto_down = rocker_port_change_proto_down, .ndo_neigh_destroy = rocker_port_neigh_destroy, + .ndo_get_port_parent_id = rocker_port_get_port_parent_id, }; /******************** @@ -2045,14 +2058,9 @@ static int rocker_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { const struct rocker_port *rocker_port = netdev_priv(dev); - const struct rocker *rocker = rocker_port->rocker; int err = 0; switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(rocker->hw.id); - memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = rocker_world_port_attr_bridge_flags_get(rocker_port, &attr->u.brport_flags); From e58df56c8564b701491236381fc1e75202952736 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:43 -0800 Subject: [PATCH 0717/1436] netdevsim: Implement ndo_get_port_parent_id() netdevsim only supports SWITCHDEV_ATTR_ID_PORT_PARENT_ID, which makes it a great candidate to be converted to use the ndo_get_port_parent_id() NDO instead of implementing switchdev_port_attr_get(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 8d8e2b3f263e..75a50b59cb8f 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "netdevsim.h" @@ -148,26 +147,16 @@ static struct device_type nsim_dev_type = { .release = nsim_dev_release, }; -static int -nsim_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int nsim_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct netdevsim *ns = netdev_priv(dev); - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(ns->sdev->switch_id); - memcpy(&attr->u.ppid.id, &ns->sdev->switch_id, - attr->u.ppid.id_len); - return 0; - default: - return -EOPNOTSUPP; - } + ppid->id_len = sizeof(ns->sdev->switch_id); + memcpy(&ppid->id, &ns->sdev->switch_id, ppid->id_len); + return 0; } -static const struct switchdev_ops nsim_switchdev_ops = { - .switchdev_port_attr_get = nsim_port_attr_get, -}; - static int nsim_init(struct net_device *dev) { char sdev_ddir_name[10], sdev_link_name[32]; @@ -214,7 +203,6 @@ static int nsim_init(struct net_device *dev) goto err_bpf_uninit; SET_NETDEV_DEV(dev, &ns->dev); - SWITCHDEV_SET_OPS(dev, &nsim_switchdev_ops); err = nsim_devlink_setup(ns); if (err) @@ -493,6 +481,7 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, .ndo_bpf = nsim_bpf, + .ndo_get_port_parent_id = nsim_get_port_parent_id, }; static void nsim_setup(struct net_device *dev) From 7870a7bd570bc1bd43622c7c6828931d3ea98b8c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:44 -0800 Subject: [PATCH 0718/1436] staging: fsl-dpaa2: ethsw: Implement ndo_get_port_parent_id() ethsw implements SWITCHDEV_ATTR_ID_PORT_PARENT_ID and we want to get rid of switchdev_ops eventually, ease that migration by implementing a ndo_get_port_parent_id() function which returns what switchdev_port_attr_get() would do. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index daabaceeea52..e559f4c25cf7 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -505,6 +505,17 @@ static netdev_tx_t port_dropframe(struct sk_buff *skb, return NETDEV_TX_OK; } +static int swdev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct ethsw_port_priv *port_priv = netdev_priv(dev); + + ppid->id_len = 1; + ppid->id[0] = port_priv->ethsw_data->dev_id; + + return 0; +} + static const struct net_device_ops ethsw_port_ops = { .ndo_open = port_open, .ndo_stop = port_stop, @@ -515,6 +526,7 @@ static const struct net_device_ops ethsw_port_ops = { .ndo_get_offload_stats = port_get_offload_stats, .ndo_start_xmit = port_dropframe, + .ndo_get_port_parent_id = swdev_get_port_parent_id, }; static void ethsw_links_state_update(struct ethsw_core *ethsw) @@ -634,10 +646,6 @@ static int swdev_port_attr_get(struct net_device *netdev, struct ethsw_port_priv *port_priv = netdev_priv(netdev); switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = 1; - attr->u.ppid.id[0] = port_priv->ethsw_data->dev_id; - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: attr->u.brport_flags = (port_priv->ethsw_data->learning ? BR_LEARNING : 0) | From 929d6c145ec4cf11482519cfd0ebf17c2fce92db Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:45 -0800 Subject: [PATCH 0719/1436] net: dsa: Implement ndo_get_port_parent_id() DSA implements SWITCHDEV_ATTR_ID_PORT_PARENT_ID and we want to get rid of switchdev_ops eventually, ease that migration by implementing a ndo_get_port_parent_id() function which returns what switchdev_port_attr_get() would do. Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 91de3a663226..70395a0ae52e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -362,18 +362,23 @@ static int dsa_slave_port_obj_del(struct net_device *dev, return err; } -static int dsa_slave_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) +static int dsa_slave_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; + ppid->id_len = sizeof(dst->index); + memcpy(&ppid->id, &dst->index, ppid->id_len); + + return 0; +} + +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(dst->index); - memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = 0; break; @@ -1046,6 +1051,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, .ndo_get_stats64 = dsa_slave_get_stats64, + .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { From bccb30254a4a02ee370dd23b2afbd25d7a78bc34 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:46 -0800 Subject: [PATCH 0720/1436] net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID Now that we have a dedicated NDO for getting a port's parent ID, get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID and convert all callers to use the NDO exclusively. This is a preliminary change to getting rid of switchdev_ops eventually. Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/switchdev.h | 11 ----------- net/bridge/br_switchdev.c | 14 +++----------- net/core/net-sysfs.c | 17 +++-------------- net/core/rtnetlink.c | 16 +++------------- net/ipv4/ipmr.c | 19 +++++-------------- net/switchdev/switchdev.c | 20 -------------------- 6 files changed, 14 insertions(+), 83 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 63843ae5dc81..5e87b54c5dc5 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -43,7 +43,6 @@ static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans) enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, - SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, @@ -61,7 +60,6 @@ struct switchdev_attr { void *complete_priv; void (*complete)(struct net_device *dev, int err, void *priv); union { - struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ @@ -208,9 +206,6 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); -bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b); - int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), @@ -295,12 +290,6 @@ call_switchdev_blocking_notifiers(unsigned long val, return NOTIFY_DONE; } -static inline bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) -{ - return false; -} - static inline int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 06b0ae44585f..db9e8ab96d48 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -14,8 +14,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (netdev_port_same_parent_id(dev, p->dev) || - switchdev_port_same_parent_id(dev, p->dev)) + if (netdev_port_same_parent_id(dev, p->dev)) return p->offload_fwd_mark; } @@ -24,19 +23,12 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) int nbp_switchdev_mark_set(struct net_bridge_port *p) { - const struct net_device_ops *ops = p->dev->netdev_ops; - struct switchdev_attr attr = { - .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; + struct netdev_phys_item_id ppid = { }; int err; ASSERT_RTNL(); - if (ops->ndo_get_port_parent_id) - err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true); - else - err = switchdev_port_attr_get(p->dev, &attr); + err = dev_get_port_parent_id(p->dev, &ppid, true); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4eace9f1dcf9..7c5061123ead 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -495,27 +494,17 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - const struct net_device_ops *ops = netdev->netdev_ops; ssize_t ret = -EINVAL; if (!rtnl_trylock()) return restart_syscall(); if (dev_isalive(netdev)) { - struct switchdev_attr attr = { - .orig_dev = netdev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, - }; + struct netdev_phys_item_id ppid = { }; - if (ops->ndo_get_port_parent_id) - ret = dev_get_port_parent_id(netdev, &attr.u.ppid, - false); - else - ret = switchdev_port_attr_get(netdev, &attr); + ret = dev_get_port_parent_id(netdev, &ppid, false); if (!ret) - ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, - attr.u.ppid.id); + ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); } rtnl_unlock(); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 90dd02c1f561..a51cab95ba64 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -46,7 +46,6 @@ #include #include -#include #include #include #include @@ -1146,26 +1145,17 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { - const struct net_device_ops *ops = dev->netdev_ops; + struct netdev_phys_item_id ppid = { }; int err; - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, - }; - if (ops->ndo_get_port_parent_id) - err = dev_get_port_parent_id(dev, &attr.u.ppid, false); - else - err = switchdev_port_attr_get(dev, &attr); + err = dev_get_port_parent_id(dev, &ppid, false); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, - attr.u.ppid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id)) return -EMSGSIZE; return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c71bcc42d66d..e536970557dd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,7 +67,6 @@ #include #include #include -#include #include @@ -837,11 +836,8 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { - const struct net_device_ops *ops; + struct netdev_phys_item_id ppid = { }; int vifi = vifc->vifc_vifi; - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -920,15 +916,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), (VIFF_TUNNEL | VIFF_REGISTER)); - attr.orig_dev = dev; - ops = dev->netdev_ops; - if (ops->ndo_get_port_parent_id && - !dev_get_port_parent_id(dev, &attr.u.ppid, true)) { - memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); - v->dev_parent_id.id_len = attr.u.ppid.id_len; - } else if (!switchdev_port_attr_get(dev, &attr)) { - memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); - v->dev_parent_id.id_len = attr.u.ppid.id_len; + err = dev_get_port_parent_id(dev, &ppid, true); + if (err == 0) { + memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); + v->dev_parent_id.id_len = ppid.id_len; } else { v->dev_parent_id.id_len = 0; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index cd78253de31d..7e1357db33d7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -592,26 +592,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); -bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) -{ - struct switchdev_attr a_attr = { - .orig_dev = a, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr b_attr = { - .orig_dev = b, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - - if (switchdev_port_attr_get(a, &a_attr) || - switchdev_port_attr_get(b, &b_attr)) - return false; - - return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); -} -EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), From 2810c3b252b902979fd7fe074123101cfc313fae Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Feb 2019 19:42:01 +0000 Subject: [PATCH 0721/1436] mlxsw: spectrum_router: Offload blackhole routes Create a new FIB entry type for blackhole routes and set it in case the type of the notified route is 'RTN_BLACKHOLE'. Program such routes with a discard action and mark them as offloaded since the device is dropping the packets instead of the kernel. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 230e1f6e192b..6754061d9b72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -364,6 +364,7 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE, /* This is a special case of local delivery, where a packet should be * decapsulated on reception. Note that there is no corresponding ENCAP, @@ -3928,6 +3929,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) return !!nh_group->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; @@ -3963,6 +3965,7 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) int i; if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; @@ -4004,7 +4007,8 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; return; @@ -4172,6 +4176,19 @@ static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } +static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, @@ -4211,6 +4228,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); @@ -4279,8 +4298,10 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; + case RTN_BLACKHOLE: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; + return 0; case RTN_UNREACHABLE: /* fall through */ - case RTN_BLACKHOLE: /* fall through */ case RTN_PROHIBIT: /* Packets hitting these routes need to be trapped, but * can do so with a lower priority than packets directed @@ -5229,6 +5250,8 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, */ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->fib6_type == RTN_BLACKHOLE) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) From a98232a16497ee642d049c519e69285c0cfb61a0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Feb 2019 19:42:03 +0000 Subject: [PATCH 0722/1436] selftests: mlxsw: Add a test for blackhole routes Use a simple topology consisting of two hosts directly connected to a router. Make sure IPv4/IPv6 ping works and then add blackhole routes. Test that ping fails and that the routes are marked as offloaded. Use a simple tc filter to test that packets were dropped by the ASIC and not trapped to the CPU. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/blackhole_routes.sh | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh new file mode 100755 index 000000000000..5ba5bef44d5b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that blackhole routes are marked as offloaded and that packets hitting +# them are dropped by the ASIC and not by the kernel. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + blackhole_ipv4 + blackhole_ipv6 +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp1 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp1 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +blackhole_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are dropped by the + # ASIC and not by the kernel + RET=0 + + ip -4 route add blackhole 198.51.100.0/30 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \ + action pass + + ip -4 route show 198.51.100.0/30 | grep -q offload + check_err $? "route not marked as offloaded when should" + + ping_do $h1 198.51.100.1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv4 blackhole route" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + ip -4 route del blackhole 198.51.100.0/30 +} + +blackhole_ipv6() +{ + RET=0 + + ip -6 route add blackhole 2001:db8:2::/120 + tc filter add dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \ + ip_proto icmpv6 action pass + + ip -6 route show 2001:db8:2::/120 | grep -q offload + check_err $? "route not marked as offloaded when should" + + ping6_do $h1 2001:db8:2::1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv6 blackhole route" + + tc filter del dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower + ip -6 route del blackhole 2001:db8:2::/120 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS From 645efa84f6c7566ea863ed37a8b3247247f72e02 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 5 Feb 2019 05:09:00 -0500 Subject: [PATCH 0723/1436] dm: add memory barrier before waitqueue_active Block core changes to switch bio-based IO accounting to be percpu had a side-effect of altering DM core to now rely on calling waitqueue_active (in both bio-based and request-based) to check if another task is in dm_wait_for_completion(). A memory barrier is needed before calling waitqueue_active(). DM core doesn't piggyback on a preceding memory barrier so it must explicitly use its own. For more details on why using waitqueue_active() without a preceding barrier is unsafe, please see the comment before the waitqueue_active() definition in include/linux/wait.h. Add the missing memory barrier by switching to using wq_has_sleeper(). Fixes: 6f75723190d8 ("dm: remove the pending IO accounting") Fixes: c4576aed8d85 ("dm: fix request-based dm's use of dm_wait_for_completion") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 2 +- drivers/md/dm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 4eb5f8c56535..a20531e5f3b4 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -131,7 +131,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) static void rq_completed(struct mapped_device *md) { /* nudge anyone waiting on suspend queue */ - if (unlikely(waitqueue_active(&md->wait))) + if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2b53c3841b53..a0972a9301de 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -699,7 +699,7 @@ static void end_io_acct(struct dm_io *io) true, duration, &io->stats_aux); /* nudge anyone waiting on suspend queue */ - if (unlikely(waitqueue_active(&md->wait))) + if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); } From fa8db4948f5224dae33a0e783e7dec682e145f88 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 5 Feb 2019 17:07:58 -0500 Subject: [PATCH 0724/1436] dm: don't use bio_trim() afterall bio_trim() has an early return, which makes it _not_ idempotent, if the offset is 0 and the bio's bi_size already matches the requested size. Prior to DM, all users of bio_trim() were fine with this. But DM has exposed the fact that bio_trim()'s early return is incompatible with a cloned bio whose integrity payload must be trimmed via bio_integrity_trim(). Fix this by reverting DM back to doing the equivalent of bio_trim() but in an idempotent manner (so bio_integrity_trim is always performed). Follow-on work is needed to assess what benefit bio_trim()'s early return is providing to its existing callers. Reported-by: Milan Broz Fixes: 57c36519e4b94 ("dm: fix clone_bio() to trigger blk_recount_segments()") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a0972a9301de..515e6af9bed2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1336,7 +1336,11 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return r; } - bio_trim(clone, sector - clone->bi_iter.bi_sector, len); + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); + clone->bi_iter.bi_size = to_bytes(len); + + if (bio_integrity(bio)) + bio_integrity_trim(clone); return 0; } From f57a98e1b71357713e44c57268a53d9c803f0626 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 6 Feb 2019 17:17:27 -0600 Subject: [PATCH 0725/1436] PCI: Work around Synopsys duplicate Device ID (HAPS USB3, NXP i.MX) There are at least four different parts with the same Vendor and Device ID ([16c3:abcd]): 1) Synopsys HAPS USB3 controller 2) Synopsys PCIe Root Port in Freescale/NXP i.MX6Q (reported by Lucas) 3) Synopsys PCIe Root Port in Freescale/NXP i.MX6QP (reported by Lukas) 4) Synopsys PCIe Root Port in Freescale/NXP i.MX7D (reported by Trent) The HAPS USB3 controller has a Class Code of PCI_CLASS_SERIAL_USB_XHCI, which means the XHCI driver would normally claim it. Previously, quirk_synopsys_haps() changed the Class Code of all [16c3:abcd] devices, including the Root Ports, to PCI_CLASS_SERIAL_USB_DEVICE to prevent the XHCI driver from claiming them so dwc3-haps can claim them instead. Changing the Class Code of the Root Ports prevents the PCI core from handling them as bridges, so devices below them don't work. Restrict the quirk so it only changes the Class Code for devices that start with the PCI_CLASS_SERIAL_USB_XHCI Class Code, leaving the Root Ports alone. Fixes: 03e6742584af ("PCI: Override Synopsys USB 3.x HAPS device class") Reported-by: Lukas F. Hartmann Reported-by: Trent Piepho Reported-by: Lucas Stach Signed-off-by: Thinh Nguyen [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas --- drivers/pci/quirks.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b0a413f3f7ca..e2a879e93d86 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -639,8 +639,9 @@ static void quirk_synopsys_haps(struct pci_dev *pdev) break; } } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID, - quirk_synopsys_haps); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_SYNOPSYS, PCI_ANY_ID, + PCI_CLASS_SERIAL_USB_XHCI, 0, + quirk_synopsys_haps); /* * Let's make the southbridge information explicit instead of having to From 660492bcf4a7561b5fdc13be0ae0b0c0a8c120be Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 6 Feb 2019 14:43:42 -0800 Subject: [PATCH 0726/1436] qed: Fix EQ full firmware assert. When slowpath messages are sent with high rate, the resulting events can lead to a FW assert in case they are not handled fast enough (Event Queue Full assert). Attempt to send queued slowpath messages only after the newly evacuated entries in the EQ ring are indicated to FW. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 4179c9013fc6..96ab77ae6af5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -382,6 +382,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn); * @param p_hwfn */ void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn); /** * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index eb88bbc6b193..ba64ff9bedbd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -397,6 +397,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain)); + /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; } @@ -767,7 +772,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, return 0; } -static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -905,7 +910,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc; if (!p_hwfn) return -EINVAL; @@ -963,12 +967,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, */ qed_spq_return_entry(p_hwfn, found); - /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; } int qed_consq_alloc(struct qed_hwfn *p_hwfn) From ebd873a31cae778c2379959ab382b34ba3b73549 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:43 -0800 Subject: [PATCH 0727/1436] qed: Assign UFP TC value to vlan priority in UFP mode. In the case of Unified Fabric Port (UFP) mode, switch provides the traffic class (TC) value to be used for the traffic. Configure hardware to use this TC value for vlan priority. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 888274fa208b..5a495fda9e9d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -604,6 +604,9 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn) p_ent->ramrod.pf_update.update_mf_vlan_flag = true; p_ent->ramrod.pf_update.mf_vlan = cpu_to_le16(p_hwfn->hw_info.ovlan); + if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) + p_ent->ramrod.pf_update.mf_vlan |= + cpu_to_le16(((u16)p_hwfn->ufp_info.tc << 13)); return qed_spq_post(p_hwfn, p_ent, NULL); } From fb1faab74ddef9ec2d841d54e5d0912a097b3abe Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:44 -0800 Subject: [PATCH 0728/1436] qed: Consider TX tcs while deriving the max num_queues for PF. Max supported queues is derived incorrectly in the case of multi-CoS. Need to consider TCs while calculating num_queues for PF. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e68ca83ae915..64ac95ca4df2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2216,7 +2216,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 num_queues = 0; /* Since the feature controls only queue-zones, - * make sure we have the contexts [rx, tx, xdp] to + * make sure we have the contexts [rx, xdp, tcs] to * match. */ for_each_hwfn(cdev, i) { @@ -2226,7 +2226,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 cids; cids = hwfn->pf_params.eth_pf_params.num_cons; - num_queues += min_t(u16, l2_queues, cids / 3); + cids /= (2 + info->num_tc); + num_queues += min_t(u16, l2_queues, cids); } /* queues might theoretically be >256, but interrupts' From 0aa4febb420d91df5b56b1864a2465765da85f4b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:45 -0800 Subject: [PATCH 0729/1436] qede: Fix system crash on configuring channels. Under heavy traffic load, when changing number of channels via ethtool (ethtool -L) which will cause interface to be reloaded, it was observed that some packets gets transmitted on old TX channel/queue id which doesn't really exist after the channel configuration leads to system crash. Add a safeguard in the driver by validating queue id through ndo_select_queue() which is called before the ndo_start_xmit(). Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 3 +++ drivers/net/ethernet/qlogic/qede/qede_fp.c | 13 +++++++++++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 613249d1e967..cd404269de53 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -494,6 +494,9 @@ struct qede_reload_args { /* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index bdf816fe5a16..31b046e24565 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + struct qede_dev *edev = netdev_priv(dev); + int total_txq; + + total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; + + return QEDE_TSS_COUNT(edev) ? + fallback(dev, skb, NULL) % total_txq : 0; +} + /* 8B udp header + 8B base tunnel header + 32B option length */ #define QEDE_MAX_TUN_HDR_LEN 48 diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5a74fcbdbc2b..9790f26d17c4 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -631,6 +631,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -666,6 +667,7 @@ static const struct net_device_ops qede_netdev_vf_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -684,6 +686,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, From 8c8502532f0064d0f04335bfe0f80d9431866a73 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 6 Feb 2019 14:43:46 -0800 Subject: [PATCH 0730/1436] qed: Change verbosity for coalescing message. Fix unnecessary logging of message in an expected default case where coalescing value read (via ethtool -c) migh not be valid unless they are configured explicitly in the hardware using ethtool -C. Signed-off-by: Rahul Verma Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 64ac95ca4df2..58be1c4c6668 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2871,7 +2871,8 @@ static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle) p_hwfn = p_cid->p_owner; rc = qed_get_queue_coalesce(p_hwfn, coal, handle); if (rc) - DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n"); + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "Unable to read queue coalescing\n"); return rc; } From 42dbcd6b25cd6fe8b47cf838f5da8178d1cd926c Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 6 Feb 2019 14:43:47 -0800 Subject: [PATCH 0731/1436] qed*: Advance drivers version to 8.37.0.20 Version update for qed/qede modules. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 2 +- drivers/net/ethernet/qlogic/qede/qede.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 24a90163775e..2d8a77cc156b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -53,7 +53,7 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_MAJOR_VERSION 8 -#define QED_MINOR_VERSION 33 +#define QED_MINOR_VERSION 37 #define QED_REVISION_VERSION 0 #define QED_ENGINEERING_VERSION 20 diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index cd404269de53..730997b13747 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -56,7 +56,7 @@ #include #define QEDE_MAJOR_VERSION 8 -#define QEDE_MINOR_VERSION 33 +#define QEDE_MINOR_VERSION 37 #define QEDE_REVISION_VERSION 0 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ From 6707f74be8621ae067d2cf1c4485900e2742c20f Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 28 Jan 2019 15:28:05 -0800 Subject: [PATCH 0732/1436] net/mlx5e: Update hw flows when encap source mac changed When we offload tc filters to hardware, hardware flows can be updated when mac of encap destination ip is changed. But we ignore one case, that the mac of local encap ip can be changed too, so we should also update them. To fix it, add route_dev in mlx5e_encap_entry struct to save the local encap netdevice, and when mac changed, kernel will flush all the neighbour on the netdevice and send NETEVENT_NEIGH_UPDATE event. The mlx5 driver will delete the flows and add them when neighbour available again. Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Cc: Hadar Hen Zion Signed-off-by: Tonghao Zhang Reviewed-by: Or Gerlitz Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 1 + 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a3750af074a4..f3c7ab6faea5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -256,6 +256,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev; /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@ -369,6 +370,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev; /* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f2573c2d2b5c..ef9e472daffb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -596,6 +596,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr); mlx5e_tc_encap_flows_add(priv, e); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index edd722824697..36eafc877e6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -148,6 +148,7 @@ struct mlx5e_encap_entry { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; + struct net_device *route_dev; int tunnel_type; int tunnel_hlen; int reformat_type; From 218d05ce326f9e1b40a56085431fa1068b43d5d9 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 28 Jan 2019 15:28:06 -0800 Subject: [PATCH 0733/1436] net/mlx5e: Don't overwrite pedit action when multiple pedit used In some case, we may use multiple pedit actions to modify packets. The command shown as below: the last pedit action is effective. $ tc filter add dev netdev_rep parent ffff: protocol ip prio 1 \ flower skip_sw ip_proto icmp dst_ip 3.3.3.3 \ action pedit ex munge ip dst set 192.168.1.100 pipe \ action pedit ex munge eth src set 00:00:00:00:00:01 pipe \ action pedit ex munge eth dst set 00:00:00:00:00:02 pipe \ action csum ip pipe \ action tunnel_key set src_ip 1.1.1.100 dst_ip 1.1.1.200 dst_port 4789 id 100 \ action mirred egress redirect dev vxlan0 To fix it, we add max_mod_hdr_actions to mlx5e_tc_flow_parse_attr struction, max_mod_hdr_actions will store the max pedit action number we support and num_mod_hdr_actions indicates how many pedit action we used, and store all pedit action to mod_hdr_actions. Fixes: d79b6df6b10a ("net/mlx5e: Add parsing of TC pedit actions to HW format") Cc: Or Gerlitz Signed-off-by: Tonghao Zhang Reviewed-by: Or Gerlitz Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1c3c9fa26b55..b5c1b039375a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -128,6 +128,7 @@ struct mlx5e_tc_flow_parse_attr { struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; @@ -1936,9 +1937,9 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ static int offload_pedit_fields(struct pedit_headers *masks, struct pedit_headers *vals, @@ -1962,9 +1963,11 @@ static int offload_pedit_fields(struct pedit_headers *masks, add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@ -2075,7 +2078,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, if (!parse_attr->mod_hdr_actions) return -ENOMEM; - parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; } @@ -2121,9 +2124,11 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, goto out_err; } - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); - if (err) - goto out_err; + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + } err = offload_pedit_fields(masks, vals, parse_attr, extack); if (err < 0) From bcd3d9d9246f35c62c5d652d20569a76dbdf4bd1 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 5 Feb 2019 12:07:28 +0100 Subject: [PATCH 0734/1436] net: dsa: mv88e6xxx: Prevent suspend to RAM On one hand, the mv88e6xxx driver has a work queue called in loop which will attempt register accesses after MDIO bus suspension, that entirely freezes the platform during suspend. On the other hand, the DSA core is not ready yet to support suspend to RAM operation because so far there is no way to recover reliably the switch configuration. To avoid the kernel to freeze when suspending with a switch driven by the mv88e6xxx driver, we choose to prevent the driver suspension and in the same way, the whole platform. Signed-off-by: Miquel Raynal Reviewed-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8dca2c949e73..669a7f13fdce 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4764,6 +4764,21 @@ static const void *pdata_device_get_match_data(struct device *dev) return NULL; } +/* There is no suspend to RAM support at DSA level yet, the switch configuration + * would be lost after a power cycle so prevent it to be suspended. + */ +static int __maybe_unused mv88e6xxx_suspend(struct device *dev) +{ + return -EOPNOTSUPP; +} + +static int __maybe_unused mv88e6xxx_resume(struct device *dev) +{ + return 0; +} + +static SIMPLE_DEV_PM_OPS(mv88e6xxx_pm_ops, mv88e6xxx_suspend, mv88e6xxx_resume); + static int mv88e6xxx_probe(struct mdio_device *mdiodev) { struct dsa_mv88e6xxx_pdata *pdata = mdiodev->dev.platform_data; @@ -4948,6 +4963,7 @@ static struct mdio_driver mv88e6xxx_driver = { .mdiodrv.driver = { .name = "mv88e6085", .of_match_table = mv88e6xxx_of_match, + .pm = &mv88e6xxx_pm_ops, }, }; From 7d8e249f393a1aca30bde876e932401d2c78bc9f Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Tue, 5 Feb 2019 14:15:20 +0200 Subject: [PATCH 0735/1436] net: stmmac: fix ptp timestamping on Rx on gmac4 The current driver only enables Pdelay_Req and Pdelay_Resp when HWTSTAMP_FILTER_PTP_V2_EVENT, HWTSTAMP_FILTER_PTP_V1_L4_EVENT or HWTSTAMP_FILTER_PTP_V2_L4_EVENT is requested. This results in ptp sync on slave mode to report 'received SYNC without timestamp' when using ptp4l. Although the hardware can support Sync, Pdelay_Req and Pdelay_resp by setting bit14 annd bits 17/16 to 01 this leaves Delay_Req timestamps out. Fix this by enabling all event and general messages timestamps. This includes SYNC, Follow_Up, Delay_Req, Delay_Resp, Pdelay_Req, Pdelay_Resp and Pdelay_Resp_Follow_Up messages. Signed-off-by: Ilias Apalodimas Acked-by: Jose Abreu Tested-by: Alexandre TORGUE Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 25 +++++++------------ .../net/ethernet/stmicro/stmmac/stmmac_ptp.h | 9 +++++-- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 517555bcfa44..effff171d250 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -597,12 +597,13 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: /* PTP v1, UDP, any kind of event packet */ config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; - /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - + /* 'xmac' hardware can support Sync, Pdelay_Req and + * Pdelay_resp by setting bit14 and bits17/16 to 01 + * This leaves Delay_Req timestamps out. + * Enable all events *and* general purpose message + * timestamping + */ + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; break; @@ -633,10 +634,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@ -669,12 +667,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) /* PTP v2/802.AS1 any layer, any kind of event packet */ config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; - /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; ptp_over_ethernet = PTP_TCR_TSIPENA; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index ecccf895fd7e..e852821289cf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -59,9 +59,14 @@ #define PTP_TCR_TSEVNTENA BIT(14) /* Enable Snapshot for Messages Relevant to Master */ #define PTP_TCR_TSMSTRENA BIT(15) -/* Select PTP packets for Taking Snapshots */ +/* Select PTP packets for Taking Snapshots + * On gmac4 specifically: + * Enable SYNC, Pdelay_Req, Pdelay_Resp when TSEVNTENA is enabled. + * or + * Enable SYNC, Follow_Up, Delay_Req, Delay_Resp, Pdelay_Req, Pdelay_Resp, + * Pdelay_Resp_Follow_Up if TSEVNTENA is disabled + */ #define PTP_TCR_SNAPTYPSEL_1 BIT(16) -#define PTP_GMAC4_TCR_SNAPTYPSEL_1 GENMASK(17, 16) /* Enable MAC address for PTP Frame Filtering */ #define PTP_TCR_TSENMACADDR BIT(18) From 3703a395012b509cb2fea50958cdbe4a6a895eed Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:01:04 +0800 Subject: [PATCH 0736/1436] net: defxx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in dfx_xmt_done() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Reviewed-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 38ac8ef41f5f..56b7791911bf 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -3512,7 +3512,7 @@ static int dfx_xmt_done(DFX_board_t *bp) bp->descr_block_virt->xmt_data[comp].long_1, p_xmt_drv_descr->p_skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(p_xmt_drv_descr->p_skb); + dev_consume_skb_irq(p_xmt_drv_descr->p_skb); /* * Move to start of next packet by updating completion index From 62d1a31cfb5d9291f4dd90c1b0ef5c22f3c235bf Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:03:51 +0800 Subject: [PATCH 0737/1436] net: tulip: de2104x: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in de_tx() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 13430f75496c..f1a2da15dd0a 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -585,7 +585,7 @@ static void de_tx (struct de_private *de) netif_dbg(de, tx_done, de->dev, "tx done, slot %d\n", tx_tail); } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } next: From 96d26503e1deba5d12e0e4b473336479cc0965af Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:07:03 +0800 Subject: [PATCH 0738/1436] net: dscc4: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in dscc4_tx_irq() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/wan/dscc4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index c0b0f525c87c..27decf8ae840 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -1575,7 +1575,7 @@ try: dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); dpriv->tx_skbuff[cur] = NULL; ++dpriv->tx_dirty; } else { From bb6a702c6d512b16a7c16a38468a14eda72f44b5 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:09:41 +0800 Subject: [PATCH 0739/1436] net: smsc: epic100: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in epic_tx() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/epic100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 15c62c160953..be47d864f8b9 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1037,7 +1037,7 @@ static void epic_tx(struct net_device *dev, struct epic_private *ep) skb = ep->tx_skbuff[entry]; pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); ep->tx_skbuff[entry] = NULL; } From c50e964b76903b03a1516a04463c2faf945ec140 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:12:04 +0800 Subject: [PATCH 0740/1436] net: fec_mpc52xx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in mpc52xx_fec_tx_interrupt() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index b90bab72efdb..c1968b3ecec8 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -369,7 +369,7 @@ static irqreturn_t mpc52xx_fec_tx_interrupt(int irq, void *dev_id) dma_unmap_single(dev->dev.parent, bd->skb_pa, skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } spin_unlock(&priv->lock); From 7c3850adbcccc2c6c9e7ab23a7dcbc4926ee5b96 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:14:51 +0800 Subject: [PATCH 0741/1436] net: fsl_ucc_hdlc: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in hdlc_tx_done() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 66d889d54e58..a08f04c3f644 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -482,7 +482,7 @@ static int hdlc_tx_done(struct ucc_hdlc_private *priv) memset(priv->tx_buffer + (be32_to_cpu(bd->buf) - priv->dma_tx_addr), 0, skb->len); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); priv->tx_skbuff[priv->skb_dirtytx] = NULL; priv->skb_dirtytx = From 98fcd70b642dd843f80c92eeb6f8c07886876999 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:19:44 +0800 Subject: [PATCH 0742/1436] net: sun: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/sunbmac.c | 2 +- drivers/net/ethernet/sun/sunhme.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 7ec4eb74fe21..6fc05c106afc 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1898,7 +1898,7 @@ static inline void cas_tx_ringN(struct cas *cp, int ring, int limit) cp->net_stats[ring].tx_packets++; cp->net_stats[ring].tx_bytes += skb->len; spin_unlock(&cp->stat_lock[ring]); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } cp->tx_old[ring] = entry; diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 720b7ac77f3b..e9b757b03b56 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -781,7 +781,7 @@ static void bigmac_tx(struct bigmac *bp) DTX(("skb(%p) ", skb)); bp->tx_skbs[elem] = NULL; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); elem = NEXT_TX(elem); } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index ff641cf30a4e..d007dfeba5c3 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1962,7 +1962,7 @@ static void happy_meal_tx(struct happy_meal *hp) this = &txbase[elem]; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); dev->stats.tx_packets++; } hp->tx_old = elem; From f4ad1a6154e305dbcde20395a6961658d1ce032b Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:21:31 +0800 Subject: [PATCH 0743/1436] net: tehuti: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in bdx_tx_cleanup() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/tehuti/tehuti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index dc966ddb6d81..b24c11187017 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1739,7 +1739,7 @@ static void bdx_tx_cleanup(struct bdx_priv *priv) tx_level -= db->rptr->len; /* '-' koz len is negative */ /* now should come skb pointer - free it */ - dev_kfree_skb_irq(db->rptr->addr.skb); + dev_consume_skb_irq(db->rptr->addr.skb); bdx_tx_db_inc_rptr(db); } From e0cbbc613175e1c67bd82721230692047923d845 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:22:54 +0800 Subject: [PATCH 0744/1436] net: via-velocity: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in velocity_free_tx_buf() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-velocity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 82412691ee66..27f6cf140845 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1740,7 +1740,7 @@ static void velocity_free_tx_buf(struct velocity_info *vptr, dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], le16_to_cpu(pktlen), DMA_TO_DEVICE); } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); tdinfo->skb = NULL; } From d2901b0739a42a5669ae33356206591a6bd3a64c Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 6 Feb 2019 00:25:54 +0800 Subject: [PATCH 0745/1436] net: broadcom: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in sbdma_tx_process() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/sb1250-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 5db9f4158e62..134ae2862efa 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -1288,7 +1288,7 @@ static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d, * for transmits, we just free buffers. */ - dev_kfree_skb_irq(sb); + dev_consume_skb_irq(sb); /* * .. and advance to the next buffer. From a6e11f6bbd29a92d1357c2f3033a70fb1b767432 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 5 Feb 2019 20:41:37 +0100 Subject: [PATCH 0746/1436] net: phy: improve genphy_c45_read_link Let's make genphy_c45_read_link behave the same as genphy_update_link and set phydev->link in the function directly. This allows to simplify the callers. In addition don't check further devices once we detect that at least one device reports link as down. v2: - remove an unused variable Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 2 -- drivers/net/phy/phy-c45.c | 15 ++++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 5531f9cabc63..296a537cdfcb 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -457,8 +457,6 @@ static int mv3310_read_status(struct phy_device *phydev) if (val < 0) return val; - phydev->link = val > 0 ? 1 : 0; - val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); if (val < 0) return val; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 519866679c91..c92d0fb7ec4f 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done); * @mmd_mask: MMDs to read status from * * Read the link status from the specified MMDs, and if they all indicate - * that the link is up, return positive. If an error is encountered, + * that the link is up, set phydev->link to 1. If an error is encountered, * a negative errno will be returned, otherwise zero. */ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) @@ -129,7 +129,7 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) int val, devad; bool link = true; - while (mmd_mask) { + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); @@ -145,7 +145,9 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) link = false; } - return link; + phydev->link = link; + + return 0; } EXPORT_SYMBOL_GPL(genphy_c45_read_link); @@ -265,7 +267,6 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg); int gen10g_read_status(struct phy_device *phydev) { u32 mmd_mask = phydev->c45_ids.devices_in_package; - int ret; /* For now just lie and say it's 10G all the time */ phydev->speed = SPEED_10000; @@ -274,11 +275,7 @@ int gen10g_read_status(struct phy_device *phydev) /* Avoid reading the vendor MMDs */ mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2)); - ret = genphy_c45_read_link(phydev, mmd_mask); - - phydev->link = ret > 0 ? 1 : 0; - - return 0; + return genphy_c45_read_link(phydev, mmd_mask); } EXPORT_SYMBOL_GPL(gen10g_read_status); From e8c32c32b48c2e889704d8ca0872f92eb027838e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 5 Feb 2019 14:47:21 -0600 Subject: [PATCH 0747/1436] net: Don't default Cavium PTP driver to 'y' 8c56df372bc1 ("net: add support for Cavium PTP coprocessor") added the Cavium PTP coprocessor driver and enabled it by default. Remove the "default y" because the driver only applies to Cavium ThunderX processors. Fixes: 8c56df372bc1 ("net: add support for Cavium PTP coprocessor") Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 5f03199a3acf..05f4a3b21e29 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -54,7 +54,6 @@ config CAVIUM_PTP tristate "Cavium PTP coprocessor as PTP clock" depends on 64BIT && PCI imply PTP_1588_CLOCK - default y ---help--- This driver adds support for the Precision Time Protocol Clocks and Timestamping coprocessor (PTP) found on Cavium processors. From 22087d659212d2401f4caa602fd57160ff1e6222 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 5 Feb 2019 22:20:09 +0100 Subject: [PATCH 0748/1436] net: emac: remove IBM_EMAC_RX_SKB_HEADROOM The EMAC driver had a custom IBM_EMAC_RX_SKB_HEADROOM Kconfig option that reserved additional skb headroom for RX. This patch removes the option and migrates the code to use napi_alloc_skb() and netdev_alloc_skb_ip_align() in its place. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/Kconfig | 12 ----- drivers/net/ethernet/ibm/emac/core.c | 64 ++++++++++++++++++--------- drivers/net/ethernet/ibm/emac/core.h | 10 ++--- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig index 90d49191beb3..eacf7e141fdc 100644 --- a/drivers/net/ethernet/ibm/emac/Kconfig +++ b/drivers/net/ethernet/ibm/emac/Kconfig @@ -28,18 +28,6 @@ config IBM_EMAC_RX_COPY_THRESHOLD depends on IBM_EMAC default "256" -config IBM_EMAC_RX_SKB_HEADROOM - int "Additional RX skb headroom (bytes)" - depends on IBM_EMAC - default "0" - help - Additional receive skb headroom. Note, that driver - will always reserve at least 2 bytes to make IP header - aligned, so usually there is no need to add any additional - headroom. - - If unsure, set to 0. - config IBM_EMAC_DEBUG bool "Debugging" depends on IBM_EMAC diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 209255495bc9..3c2a5759844a 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -1071,7 +1071,9 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) /* Second pass, allocate new skbs */ for (i = 0; i < NUM_RX_BUFF; ++i) { - struct sk_buff *skb = alloc_skb(rx_skb_size, GFP_ATOMIC); + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev->ndev, rx_skb_size); if (!skb) { ret = -ENOMEM; goto oom; @@ -1080,10 +1082,10 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) BUG_ON(!dev->rx_skb[i]); dev_kfree_skb(dev->rx_skb[i]); - skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); dev->rx_desc[i].data_ptr = - dma_map_single(&dev->ofdev->dev, skb->data - 2, rx_sync_size, - DMA_FROM_DEVICE) + 2; + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + rx_sync_size, DMA_FROM_DEVICE) + + NET_IP_ALIGN; dev->rx_skb[i] = skb; } skip: @@ -1174,20 +1176,18 @@ static void emac_clean_rx_ring(struct emac_instance *dev) } } -static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot, - gfp_t flags) +static int +__emac_prepare_rx_skb(struct sk_buff *skb, struct emac_instance *dev, int slot) { - struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags); if (unlikely(!skb)) return -ENOMEM; dev->rx_skb[slot] = skb; dev->rx_desc[slot].data_len = 0; - skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); dev->rx_desc[slot].data_ptr = - dma_map_single(&dev->ofdev->dev, skb->data - 2, dev->rx_sync_size, - DMA_FROM_DEVICE) + 2; + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + dev->rx_sync_size, DMA_FROM_DEVICE) + NET_IP_ALIGN; wmb(); dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); @@ -1195,6 +1195,27 @@ static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot, return 0; } +static int +emac_alloc_rx_skb(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = __netdev_alloc_skb_ip_align(dev->ndev, dev->rx_skb_size, + GFP_KERNEL); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + +static int +emac_alloc_rx_skb_napi(struct emac_instance *dev, int slot) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(&dev->mal->napi, dev->rx_skb_size); + + return __emac_prepare_rx_skb(skb, dev, slot); +} + static void emac_print_link_status(struct emac_instance *dev) { if (netif_carrier_ok(dev->ndev)) @@ -1225,7 +1246,7 @@ static int emac_open(struct net_device *ndev) /* Allocate RX ring */ for (i = 0; i < NUM_RX_BUFF; ++i) - if (emac_alloc_rx_skb(dev, i, GFP_KERNEL)) { + if (emac_alloc_rx_skb(dev, i)) { printk(KERN_ERR "%s: failed to allocate RX ring\n", ndev->name); goto oom; @@ -1660,8 +1681,9 @@ static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot, DBG2(dev, "recycle %d %d" NL, slot, len); if (len) - dma_map_single(&dev->ofdev->dev, skb->data - 2, - EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE); + dma_map_single(&dev->ofdev->dev, skb->data - NET_IP_ALIGN, + SKB_DATA_ALIGN(len + NET_IP_ALIGN), + DMA_FROM_DEVICE); dev->rx_desc[slot].data_len = 0; wmb(); @@ -1713,7 +1735,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) int len = dev->rx_desc[slot].data_len; int tot_len = dev->rx_sg_skb->len + len; - if (unlikely(tot_len + 2 > dev->rx_skb_size)) { + if (unlikely(tot_len + NET_IP_ALIGN > dev->rx_skb_size)) { ++dev->estats.rx_dropped_mtu; dev_kfree_skb(dev->rx_sg_skb); dev->rx_sg_skb = NULL; @@ -1769,16 +1791,18 @@ static int emac_poll_rx(void *param, int budget) } if (len && len < EMAC_RX_COPY_THRESH) { - struct sk_buff *copy_skb = - alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC); + struct sk_buff *copy_skb; + + copy_skb = napi_alloc_skb(&dev->mal->napi, len); if (unlikely(!copy_skb)) goto oom; - skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2); - memcpy(copy_skb->data - 2, skb->data - 2, len + 2); + memcpy(copy_skb->data - NET_IP_ALIGN, + skb->data - NET_IP_ALIGN, + len + NET_IP_ALIGN); emac_recycle_rx_skb(dev, slot, len); skb = copy_skb; - } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) + } else if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) goto oom; skb_put(skb, len); @@ -1799,7 +1823,7 @@ static int emac_poll_rx(void *param, int budget) sg: if (ctrl & MAL_RX_CTRL_FIRST) { BUG_ON(dev->rx_sg_skb); - if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) { + if (unlikely(emac_alloc_rx_skb_napi(dev, slot))) { DBG(dev, "rx OOM %d" NL, slot); ++dev->estats.rx_dropped_oom; emac_recycle_rx_skb(dev, slot, 0); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 84caa4a3fc52..187689cd8212 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -68,22 +68,18 @@ static inline int emac_rx_size(int mtu) return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); } -#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment()) - -#define EMAC_RX_SKB_HEADROOM \ - EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM) - /* Size of RX skb for the given MTU */ static inline int emac_rx_skb_size(int mtu) { int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu)); - return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM; + + return SKB_DATA_ALIGN(size + NET_IP_ALIGN) + NET_SKB_PAD; } /* RX DMA sync size */ static inline int emac_rx_sync_size(int mtu) { - return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2); + return SKB_DATA_ALIGN(emac_rx_size(mtu) + NET_IP_ALIGN); } /* Driver statistcs is split into two parts to make it more cache friendly: From 614db26954ff08fa8e92b04100e31ebc04b817cf Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 29 Jan 2019 15:15:18 +0100 Subject: [PATCH 0749/1436] Revert "s390/pci: remove bit_lock usage in interrupt handler" This reverts commit 9594ca6b87d9f11e9f14ac31581e0e5d79a8e839. With the handle_simple_irq irq_flow_handler it must be ensured to not call generic_handle_irq with the same IRQ number on 2 CPUs at the same time (interrupts are floating on s390). Contrary to my initial investigation the irq_desc's lock usage in handle_simple_irq does not ensure this. Thus re-introduce the bit- lock usage in s390's pci handler. Reported-by: Ursula Braun Reported-by: Alexander Schmidt Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a966d7bfac57..4266a4de3160 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -382,7 +382,9 @@ static void zpci_irq_handler(struct airq_struct *airq) if (ai == -1UL) break; inc_irq_stat(IRQIO_MSI); + airq_iv_lock(aibv, ai); generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); } } } @@ -408,7 +410,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) zdev->aisb = aisb; /* Create adapter interrupt vector */ - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA); + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); if (!zdev->aibv) return -ENOMEM; From ce5cbf53496bfebe1b7478a532820373342e8302 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Tue, 5 Feb 2019 16:14:00 -0800 Subject: [PATCH 0750/1436] ACPI: Set debug output flags independent of ACPICA There was a divergence between Linux and ACPICA on the definition of ACPI_DEBUG_DEFAULT. This divergence was solved by taking ACPICA's definition in 4c1379d7bb42. After resolving the divergence, it was clear that Linux users wanted to use their old set of debug flags. This change fixes the divergence by setting these debug flags during acpi_early_init() rather than during global variable initialization in acpixf.h (owned by ACPICA). Fixes: 4c1379d7bb42 ("ACPICA: Debug output: Add option to display method/object evaluation") Reported-by: Michael J Ruhl Reported-by: Alex Gagniuc Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 5c093ce01bcd..147f6c7ea59c 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1029,6 +1029,9 @@ void __init acpi_early_init(void) acpi_permanent_mmap = true; + /* Initialize debug output. Linux does not use ACPICA defaults */ + acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR; + #ifdef CONFIG_X86 /* * If the machine falls into the DMI check table, From e761a927bc9a7ee6ceb7c4f63d5922dbced87f0d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 20 Dec 2018 12:44:05 +0100 Subject: [PATCH 0751/1436] KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded We have two ways to reset a vcpu: - either through VCPU_INIT - or through a PSCI_ON call The first one is easy to reason about. The second one is implemented in a more bizarre way, as it is the vcpu that handles PSCI_ON that resets the vcpu that is being powered-on. As we need to turn the logic around and have the target vcpu to reset itself, we must take some preliminary steps. Resetting the VCPU state modifies the system register state in memory, but this may interact with vcpu_load/vcpu_put if running with preemption disabled, which in turn may lead to corrupted system register state. Address this by disabling preemption and doing put/load if required around the reset logic. Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index b72a3dd56204..f21a2a575939 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -105,16 +105,33 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) * This function finds the right table above and sets the registers on * the virtual CPU struct to their architecturally defined reset * values. + * + * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT + * ioctl or as part of handling a request issued by another VCPU in the PSCI + * handling code. In the first case, the VCPU will not be loaded, and in the + * second case the VCPU will be loaded. Because this function operates purely + * on the memory-backed valus of system registers, we want to do a full put if + * we were loaded (handling a request) and load the values back at the end of + * the function. Otherwise we leave the state alone. In both cases, we + * disable preemption around the vcpu reset as we would otherwise race with + * preempt notifiers which also call put/load. */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { const struct kvm_regs *cpu_reset; + int ret = -EINVAL; + bool loaded; + + preempt_disable(); + loaded = (vcpu->cpu != -1); + if (loaded) + kvm_arch_vcpu_put(vcpu); switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (!cpu_has_32bit_el1()) - return -EINVAL; + goto out; cpu_reset = &default_regs_reset32; } else { cpu_reset = &default_regs_reset; @@ -137,7 +154,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu); + ret = kvm_timer_vcpu_reset(vcpu); +out: + if (loaded) + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + preempt_enable(); + return ret; } void kvm_set_ipa_limit(void) From 358b28f09f0ab074d781df72b8a671edb1547789 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Dec 2018 11:36:07 +0000 Subject: [PATCH 0752/1436] arm/arm64: KVM: Allow a VCPU to fully reset itself The current kvm_psci_vcpu_on implementation will directly try to manipulate the state of the VCPU to reset it. However, since this is not done on the thread that runs the VCPU, we can end up in a strangely corrupted state when the source and target VCPUs are running at the same time. Fix this by factoring out all reset logic from the PSCI implementation and forwarding the required information along with a request to the target VCPU. Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 10 +++++++++ arch/arm/kvm/reset.c | 24 +++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 11 ++++++++++ arch/arm64/kvm/reset.c | 24 +++++++++++++++++++++ virt/kvm/arm/arm.c | 10 +++++++++ virt/kvm/arm/psci.c | 36 ++++++++++++++----------------- 6 files changed, 95 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ca56537b61bc..50e89869178a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -48,6 +48,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -147,6 +148,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +struct vcpu_reset_state { + unsigned long pc; + unsigned long r0; + bool be; + bool reset; +}; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -186,6 +194,8 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; + struct vcpu_reset_state reset_state; + /* Detect first run of a vcpu */ bool has_run_once; }; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 5ed0c3ee33d6..e53327912adc 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -69,6 +70,29 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* + * Additional reset state handling that PSCI may have imposed on us. + * Must be done after all the sys_reg reset. + */ + if (READ_ONCE(vcpu->arch.reset_state.reset)) { + unsigned long target_pc = vcpu->arch.reset_state.pc; + + /* Gracefully handle Thumb2 entry point */ + if (target_pc & 1) { + target_pc &= ~1UL; + vcpu_set_thumb(vcpu); + } + + /* Propagate caller endianness */ + if (vcpu->arch.reset_state.be) + kvm_vcpu_set_be(vcpu); + + *vcpu_pc(vcpu) = target_pc; + vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); + + vcpu->arch.reset_state.reset = false; + } + /* Reset arch_timer context */ return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7732d0ba4e60..da3fc7324d68 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -48,6 +48,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -208,6 +209,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +struct vcpu_reset_state { + unsigned long pc; + unsigned long r0; + bool be; + bool reset; +}; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -297,6 +305,9 @@ struct kvm_vcpu_arch { /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; + /* Additional reset state */ + struct vcpu_reset_state reset_state; + /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f21a2a575939..f16a5f8ff2b4 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* Maximum phys_shift supported for any VM on this host */ @@ -146,6 +147,29 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset system registers */ kvm_reset_sys_regs(vcpu); + /* + * Additional reset state handling that PSCI may have imposed on us. + * Must be done after all the sys_reg reset. + */ + if (vcpu->arch.reset_state.reset) { + unsigned long target_pc = vcpu->arch.reset_state.pc; + + /* Gracefully handle Thumb2 entry point */ + if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { + target_pc &= ~1UL; + vcpu_set_thumb(vcpu); + } + + /* Propagate caller endianness */ + if (vcpu->arch.reset_state.be) + kvm_vcpu_set_be(vcpu); + + *vcpu_pc(vcpu) = target_pc; + vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); + + vcpu->arch.reset_state.reset = false; + } + /* Reset PMU */ kvm_pmu_vcpu_reset(vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9e350fd34504..9c486fad3f9f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -626,6 +626,13 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu) /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_SLEEP, vcpu); } + + /* + * Make sure we will observe a potential reset request if we've + * observed a change to the power state. Pairs with the smp_wmb() in + * kvm_psci_vcpu_on(). + */ + smp_rmb(); } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -639,6 +646,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) vcpu_req_sleep(vcpu); + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + /* * Clear IRQ_PENDING requests that were made to guarantee * that a VCPU sees new virtual interrupts. diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 9b73d3ad918a..34d08ee63747 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -104,12 +104,10 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { + struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; - struct swait_queue_head *wq; unsigned long cpu_id; - unsigned long context_id; - phys_addr_t target_pc; cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; if (vcpu_mode_is_32bit(source_vcpu)) @@ -130,32 +128,30 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_INVALID_PARAMS; } - target_pc = smccc_get_arg2(source_vcpu); - context_id = smccc_get_arg3(source_vcpu); + reset_state = &vcpu->arch.reset_state; - kvm_reset_vcpu(vcpu); - - /* Gracefully handle Thumb2 entry point */ - if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { - target_pc &= ~((phys_addr_t) 1); - vcpu_set_thumb(vcpu); - } + reset_state->pc = smccc_get_arg2(source_vcpu); /* Propagate caller endianness */ - if (kvm_vcpu_is_be(source_vcpu)) - kvm_vcpu_set_be(vcpu); + reset_state->be = kvm_vcpu_is_be(source_vcpu); - *vcpu_pc(vcpu) = target_pc; /* * NOTE: We always update r0 (or x0) because for PSCI v0.1 * the general puspose registers are undefined upon CPU_ON. */ - smccc_set_retval(vcpu, context_id, 0, 0, 0); - vcpu->arch.power_off = false; - smp_mb(); /* Make sure the above is visible */ + reset_state->r0 = smccc_get_arg3(source_vcpu); - wq = kvm_arch_vcpu_wq(vcpu); - swake_up_one(wq); + WRITE_ONCE(reset_state->reset, true); + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + + /* + * Make sure the reset request is observed if the change to + * power_state is observed. + */ + smp_wmb(); + + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); return PSCI_RET_SUCCESS; } From 20589c8cc47dce5854c8bf1b44a9fc63d798d26d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Dec 2018 13:07:40 +0000 Subject: [PATCH 0753/1436] arm/arm64: KVM: Don't panic on failure to properly reset system registers Failing to properly reset system registers is pretty bad. But not quite as bad as bringing the whole machine down... So warn loudly, but slightly more gracefully. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall --- arch/arm/kvm/coproc.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 222c1635bc7a..e8bd288fd5be 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -1450,6 +1450,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) reset_coproc_regs(vcpu, table, num); for (num = 1; num < NR_CP15_REGS; num++) - if (vcpu_cp15(vcpu, num) == 0x42424242) - panic("Didn't reset vcpu_cp15(vcpu, %zi)", num); + WARN(vcpu_cp15(vcpu, num) == 0x42424242, + "Didn't reset vcpu_cp15(vcpu, %zi)", num); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 86096774abcd..c936aa40c3f4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2608,7 +2608,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) table = get_target_table(vcpu->arch.target, true, &num); reset_sys_reg_descs(vcpu, table, num); - for (num = 1; num < NR_SYS_REGS; num++) - if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242) - panic("Didn't reset __vcpu_sys_reg(%zi)", num); + for (num = 1; num < NR_SYS_REGS; num++) { + if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242, + "Didn't reset __vcpu_sys_reg(%zi)\n", num)) + break; + } } From ab2d5eb03dbb7b37a1c6356686fb48626ab0c93e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 10 Jan 2019 15:33:52 +0100 Subject: [PATCH 0754/1436] KVM: arm/arm64: vgic: Always initialize the group of private IRQs We currently initialize the group of private IRQs during kvm_vgic_vcpu_init, and the value of the group depends on the GIC model we are emulating. However, CPUs created before creating (and initializing) the VGIC might end up with the wrong group if the VGIC is created as GICv3 later. Since we have no enforced ordering of creating the VGIC and creating VCPUs, we can end up with part the VCPUs being properly intialized and the remaining incorrectly initialized. That also means that we have no single place to do the per-cpu data structure initialization which depends on knowing the emulated GIC model (which is only the group field). This patch removes the incorrect comment from kvm_vgic_vcpu_init and initializes the group of all previously created VCPUs's private interrupts in vgic_init in addition to the existing initialization in kvm_vgic_vcpu_init. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index dfbfcb1fe933..3bdb31eaed64 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -231,13 +231,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->config = VGIC_CONFIG_LEVEL; } - /* - * GICv3 can only be created via the KVM_DEVICE_CREATE API and - * so we always know the emulation type at this point as it's - * either explicitly configured as GICv3, or explicitly - * configured as GICv2, or not configured yet which also - * implies GICv2. - */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) irq->group = 1; else @@ -281,7 +274,7 @@ int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0, i, idx; if (vgic_initialized(kvm)) return 0; @@ -298,6 +291,19 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + /* Initialize groups on CPUs created before the VGIC type was known */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + irq->group = 1; + else + irq->group = 0; + } + } + if (vgic_has_its(kvm)) { ret = vgic_v4_init(kvm); if (ret) From 309a205688060fbb000e9402078cf53cebde0793 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 2 Feb 2019 12:14:03 +0000 Subject: [PATCH 0755/1436] arm: KVM: Add missing kvm_stage2_has_pmd() helper Fixup 32bit by providing the now required helper. Cc: Suzuki Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/stage2_pgtable.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index c4b1d4fb1797..de2089501b8b 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -76,4 +76,9 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm) #define S2_PMD_MASK PMD_MASK #define S2_PMD_SIZE PMD_SIZE +static inline bool kvm_stage2_has_pmd(struct kvm *kvm) +{ + return true; +} + #endif /* __ARM_S2_PGTABLE_H_ */ From 280cebfd05c8e381a392c662006dfaa6377feefc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 29 Jan 2019 19:12:17 +0000 Subject: [PATCH 0756/1436] KVM: arm64: Relax the restriction on using stage2 PUD huge mapping We restrict mapping the PUD huge pages in stage2 to only when the stage2 has 4 level page table, leaving the feature unused with the default IPA size. But we could use it even with a 3 level page table, i.e, when the PUD level is folded into PGD, just like the stage1. Relax the condition to allow using the PUD huge page mappings at stage2 when it is possible. Cc: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index fbdf3ac2f001..30251e288629 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1695,11 +1695,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = vma_kernel_pagesize(vma); /* - * PUD level may not exist for a VM but PMD is guaranteed to - * exist. + * The stage2 has a minimum of 2 level table (For arm64 see + * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can + * use PMD_SIZE huge mappings (even when the PMD is folded into PGD). + * As for PUD huge maps, we must make sure that we have at least + * 3 levels, i.e, PMD is not folded. */ if ((vma_pagesize == PMD_SIZE || - (vma_pagesize == PUD_SIZE && kvm_stage2_has_pud(kvm))) && + (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) && !force_pte) { gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; } From 7d82602909ed9c73b34ad26f05d10db4850a4f8c Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:54 +0000 Subject: [PATCH 0757/1436] KVM: arm64: Forbid kprobing of the VHE world-switch code On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text. __hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic. echo "p:weasel sysreg_save_guest_state_vhe" > /sys/kernel/debug/tracing/kprobe_events echo 1 > /sys/kernel/debug/tracing/events/kprobes/weasel/enable lkvm run -k /boot/Image --console serial -p "console=ttyS0 earlycon=uart,mmio,0x3f8" # lkvm run -k /boot/Image -m 384 -c 3 --name guest-1474 Info: Placing fdt at 0x8fe00000 - 0x8fffffff Info: virtio-mmio.devices=0x200@0x10000:36 Info: virtio-mmio.devices=0x200@0x10200:37 Info: virtio-mmio.devices=0x200@0x10400:38 [ 614.178186] Kernel panic - not syncing: HYP panic: [ 614.178186] PS:404003c9 PC:ffff0000100d70e0 ESR:f2000004 [ 614.178186] FAR:0000000080080000 HPFAR:0000000000800800 PAR:1d00007edbadc0de [ 614.178186] VCPU:00000000f8de32f1 [ 614.178383] CPU: 2 PID: 1482 Comm: kvm-vcpu-0 Not tainted 5.0.0-rc2 #10799 [ 614.178446] Call trace: [ 614.178480] dump_backtrace+0x0/0x148 [ 614.178567] show_stack+0x24/0x30 [ 614.178658] dump_stack+0x90/0xb4 [ 614.178710] panic+0x13c/0x2d8 [ 614.178793] hyp_panic+0xac/0xd8 [ 614.178880] kvm_vcpu_run_vhe+0x9c/0xe0 [ 614.178958] kvm_arch_vcpu_ioctl_run+0x454/0x798 [ 614.179038] kvm_vcpu_ioctl+0x360/0x898 [ 614.179087] do_vfs_ioctl+0xc4/0x858 [ 614.179174] ksys_ioctl+0x84/0xb8 [ 614.179261] __arm64_sys_ioctl+0x28/0x38 [ 614.179348] el0_svc_common+0x94/0x108 [ 614.179401] el0_svc_handler+0x38/0x78 [ 614.179487] el0_svc+0x8/0xc [ 614.179558] SMP: stopping secondary CPUs [ 614.179661] Kernel Offset: disabled [ 614.179695] CPU features: 0x003,2a80aa38 [ 614.179758] Memory Limit: none [ 614.179858] ---[ end Kernel panic - not syncing: HYP panic: [ 614.179858] PS:404003c9 PC:ffff0000100d70e0 ESR:f2000004 [ 614.179858] FAR:0000000080080000 HPFAR:0000000000800800 PAR:1d00007edbadc0de [ 614.179858] VCPU:00000000f8de32f1 ]--- Annotate the VHE world-switch functions that aren't marked __hyp_text using NOKPROBE_SYMBOL(). Signed-off-by: James Morse Fixes: 3f5c90b890ac ("KVM: arm64: Introduce VHE-specific kvm_vcpu_run") Acked-by: Masami Hiramatsu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 5 +++++ arch/arm64/kvm/hyp/sysreg-sr.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b0b1478094b4..421ebf6f7086 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -107,6 +108,7 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) write_sysreg(kvm_get_hyp_vector(), vbar_el1); } +NOKPROBE_SYMBOL(activate_traps_vhe); static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) { @@ -154,6 +156,7 @@ static void deactivate_traps_vhe(void) write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); } +NOKPROBE_SYMBOL(deactivate_traps_vhe); static void __hyp_text __deactivate_traps_nvhe(void) { @@ -513,6 +516,7 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) return exit_code; } +NOKPROBE_SYMBOL(kvm_vcpu_run_vhe); /* Switch to the guest for legacy non-VHE systems */ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) @@ -620,6 +624,7 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, read_sysreg_el2(esr), read_sysreg_el2(far), read_sysreg(hpfar_el2), par, vcpu); } +NOKPROBE_SYMBOL(__hyp_call_panic_vhe); void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 68d6f7c3b237..b426e2cf973c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -98,12 +99,14 @@ void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) { __sysreg_save_common_state(ctxt); } +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) { __sysreg_save_common_state(ctxt); __sysreg_save_el2_return_state(ctxt); } +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { @@ -188,12 +191,14 @@ void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) { __sysreg_restore_common_state(ctxt); } +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) { __sysreg_restore_common_state(ctxt); __sysreg_restore_el2_return_state(ctxt); } +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) { From 76379dfbfd7c8fd7dd29eea3f828cf85c884829e Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Mon, 4 Feb 2019 22:41:05 +0900 Subject: [PATCH 0758/1436] ASoC: rsnd: ssiu: correct shift bit for ssiu9 Currently "0xf << 36" is used to clear SSIU-9 internal buffer state, which overflows 32-bit value according to user reference manual, it is always bit4 ~ bit7 of SSI_SYS_STATUS[1,3,5,7] registers indicate SSIU-9's buffer state, so "0xf << 4" should be used. This patch fix incorrect shifting issue in SSIU-9 case Fixes: commit b7169ddea2f2 ("ASoC: rsnd: remove RSND_REG_ from rsnd_reg") Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssiu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index c5934adcfd01..c74991dd18ab 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -79,7 +79,7 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, break; case 9: for (i = 0; i < 4; i++) - rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << (id * 4)); + rsnd_mod_write(mod, SSI_SYS_STATUS((i * 2) + 1), 0xf << 4); break; } From 860b454c2c0cbda6892954f5cdbbb48931b3c8db Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 7 Feb 2019 15:20:41 +0100 Subject: [PATCH 0759/1436] ASoC: samsung: Prevent clk_get_rate() calls in atomic context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves clk_get_rate() call from trigger() to hw_params() callback to avoid calling sleeping clk API from atomic context and prevent deadlock as indicated below. Before this change clk_get_rate() was being called with same spinlock held as the one passed to the clk API when registering clocks exposed by the I2S driver. [ 82.109780] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908 [ 82.117009] in_atomic(): 1, irqs_disabled(): 128, pid: 1554, name: speaker-test [ 82.124235] 3 locks held by speaker-test/1554: [ 82.128653] #0: cc8c5328 (snd_pcm_link_rwlock){...-}, at: snd_pcm_stream_lock_irq+0x20/0x38 [ 82.137058] #1: ec9eda17 (&(&substream->self_group.lock)->rlock){..-.}, at: snd_pcm_ioctl+0x900/0x1268 [ 82.146417] #2: 6ac279bf (&(&pri_dai->spinlock)->rlock){..-.}, at: i2s_trigger+0x64/0x6d4 [ 82.154650] irq event stamp: 8144 [ 82.157949] hardirqs last enabled at (8143): [] _raw_read_unlock_irq+0x24/0x5c [ 82.166089] hardirqs last disabled at (8144): [] _raw_read_lock_irq+0x18/0x58 [ 82.174063] softirqs last enabled at (8004): [] __do_softirq+0x3a4/0x66c [ 82.181688] softirqs last disabled at (7997): [] irq_exit+0x140/0x168 [ 82.188964] Preemption disabled at: [ 82.188967] [<00000000>] (null) [ 82.195728] CPU: 6 PID: 1554 Comm: speaker-test Not tainted 5.0.0-rc5-00192-ga6e6caca8f03 #191 [ 82.204302] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 82.210376] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 82.218084] [] (show_stack) from [] (dump_stack+0x90/0xc8) [ 82.225278] [] (dump_stack) from [] (___might_sleep+0x22c/0x2c8) [ 82.232990] [] (___might_sleep) from [] (__mutex_lock+0x28/0xa3c) [ 82.240788] [] (__mutex_lock) from [] (mutex_lock_nested+0x1c/0x24) [ 82.248763] [] (mutex_lock_nested) from [] (clk_prepare_lock+0x78/0xec) [ 82.257079] [] (clk_prepare_lock) from [] (clk_core_get_rate+0xc/0x5c) [ 82.265309] [] (clk_core_get_rate) from [] (i2s_trigger+0x490/0x6d4) [ 82.273369] [] (i2s_trigger) from [] (soc_pcm_trigger+0x100/0x140) [ 82.281254] [] (soc_pcm_trigger) from [] (snd_pcm_do_start+0x2c/0x30) [ 82.289400] [] (snd_pcm_do_start) from [] (snd_pcm_action_single+0x38/0x78) [ 82.298065] [] (snd_pcm_action_single) from [] (snd_pcm_ioctl+0x910/0x1268) [ 82.306734] [] (snd_pcm_ioctl) from [] (do_vfs_ioctl+0x90/0x9ec) [ 82.314443] [] (do_vfs_ioctl) from [] (ksys_ioctl+0x34/0x60) [ 82.321808] [] (ksys_ioctl) from [] (ret_fast_syscall+0x0/0x28) [ 82.329431] Exception stack(0xeb875fa8 to 0xeb875ff0) [ 82.334459] 5fa0: 00033c18 b6e31000 00000004 00004142 00033d80 00033d80 [ 82.342605] 5fc0: 00033c18 b6e31000 00008000 00000036 00008000 00000000 beea38a8 00008000 [ 82.350748] 5fe0: b6e3142c beea384c b6da9a30 b6c9212c [ 82.355789] [ 82.357245] ====================================================== [ 82.363397] WARNING: possible circular locking dependency detected [ 82.369551] 5.0.0-rc5-00192-ga6e6caca8f03 #191 Tainted: G W [ 82.376395] ------------------------------------------------------ [ 82.382548] speaker-test/1554 is trying to acquire lock: [ 82.387834] 6d2007f4 (prepare_lock){+.+.}, at: clk_prepare_lock+0x78/0xec [ 82.394593] [ 82.394593] but task is already holding lock: [ 82.400398] 6ac279bf (&(&pri_dai->spinlock)->rlock){..-.}, at: i2s_trigger+0x64/0x6d4 [ 82.408197] [ 82.408197] which lock already depends on the new lock. [ 82.416343] [ 82.416343] the existing dependency chain (in reverse order) is: [ 82.423795] [ 82.423795] -> #1 (&(&pri_dai->spinlock)->rlock){..-.}: [ 82.430472] clk_mux_set_parent+0x34/0xb8 [ 82.434975] clk_core_set_parent_nolock+0x1c4/0x52c [ 82.440347] clk_set_parent+0x38/0x6c [ 82.444509] of_clk_set_defaults+0xc8/0x308 [ 82.449186] of_clk_add_provider+0x84/0xd0 [ 82.453779] samsung_i2s_probe+0x408/0x5f8 [ 82.458376] platform_drv_probe+0x48/0x98 [ 82.462879] really_probe+0x224/0x3f4 [ 82.467037] driver_probe_device+0x70/0x1c4 [ 82.471716] bus_for_each_drv+0x44/0x8c [ 82.476049] __device_attach+0xa0/0x138 [ 82.480382] bus_probe_device+0x88/0x90 [ 82.484715] deferred_probe_work_func+0x6c/0xbc [ 82.489741] process_one_work+0x200/0x740 [ 82.494246] worker_thread+0x2c/0x4c8 [ 82.498408] kthread+0x128/0x164 [ 82.502131] ret_from_fork+0x14/0x20 [ 82.506204] (null) [ 82.508976] [ 82.508976] -> #0 (prepare_lock){+.+.}: [ 82.514264] __mutex_lock+0x60/0xa3c [ 82.518336] mutex_lock_nested+0x1c/0x24 [ 82.522756] clk_prepare_lock+0x78/0xec [ 82.527088] clk_core_get_rate+0xc/0x5c [ 82.531421] i2s_trigger+0x490/0x6d4 [ 82.535494] soc_pcm_trigger+0x100/0x140 [ 82.539913] snd_pcm_do_start+0x2c/0x30 [ 82.544246] snd_pcm_action_single+0x38/0x78 [ 82.549012] snd_pcm_ioctl+0x910/0x1268 [ 82.553345] do_vfs_ioctl+0x90/0x9ec [ 82.557417] ksys_ioctl+0x34/0x60 [ 82.561229] ret_fast_syscall+0x0/0x28 [ 82.565477] 0xbeea384c [ 82.568421] [ 82.568421] other info that might help us debug this: [ 82.568421] [ 82.576394] Possible unsafe locking scenario: [ 82.576394] [ 82.582285] CPU0 CPU1 [ 82.586792] ---- ---- [ 82.591297] lock(&(&pri_dai->spinlock)->rlock); [ 82.595977] lock(prepare_lock); [ 82.601782] lock(&(&pri_dai->spinlock)->rlock); [ 82.608975] lock(prepare_lock); [ 82.612268] [ 82.612268] *** DEADLOCK *** Fixes: 647d04f8e07a ("ASoC: samsung: i2s: Ensure the RCLK rate is properly determined") Reported-by: Krzysztof Kozłowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mark Brown --- sound/soc/samsung/i2s.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index d6c62aa13041..ce00fe2f6aae 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -700,6 +700,7 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, { struct i2s_dai *i2s = to_info(dai); u32 mod, mask = 0, val = 0; + struct clk *rclksrc; unsigned long flags; WARN_ON(!pm_runtime_active(dai->dev)); @@ -782,6 +783,10 @@ static int i2s_hw_params(struct snd_pcm_substream *substream, i2s->frmclk = params_rate(params); + rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC]; + if (rclksrc && !IS_ERR(rclksrc)) + i2s->rclk_srcrate = clk_get_rate(rclksrc); + return 0; } @@ -886,11 +891,6 @@ static int config_setup(struct i2s_dai *i2s) return 0; if (!(i2s->quirks & QUIRK_NO_MUXPSR)) { - struct clk *rclksrc = i2s->clk_table[CLK_I2S_RCLK_SRC]; - - if (rclksrc && !IS_ERR(rclksrc)) - i2s->rclk_srcrate = clk_get_rate(rclksrc); - psr = i2s->rclk_srcrate / i2s->frmclk / rfs; writel(((psr - 1) << 8) | PSR_PSREN, i2s->addr + I2SPSR); dev_dbg(&i2s->pdev->dev, From 35634ffa1751b6efd8cf75010b509dcb0263e29b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 6 Feb 2019 18:39:40 -0600 Subject: [PATCH 0760/1436] signal: Always notice exiting tasks Recently syzkaller was able to create unkillablle processes by creating a timer that is delivered as a thread local signal on SIGHUP, and receiving SIGHUP SA_NODEFERER. Ultimately causing a loop failing to deliver SIGHUP but always trying. Upon examination it turns out part of the problem is actually most of the solution. Since 2.5 signal delivery has found all fatal signals, marked the signal group for death, and queued SIGKILL in every threads thread queue relying on signal->group_exit_code to preserve the information of which was the actual fatal signal. The conversion of all fatal signals to SIGKILL results in the synchronous signal heuristic in next_signal kicking in and preferring SIGHUP to SIGKILL. Which is especially problematic as all fatal signals have already been transformed into SIGKILL. Instead of dequeueing signals and depending upon SIGKILL to be the first signal dequeued, first test if the signal group has already been marked for death. This guarantees that nothing in the signal queue can prevent a process that needs to exit from exiting. Cc: stable@vger.kernel.org Tested-by: Dmitry Vyukov Reported-by: Dmitry Vyukov Ref: ebf5ebe31d2c ("[PATCH] signal-fixes-2.5.59-A4") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index 9ca8e5278c8e..5424cb0006bc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2393,6 +2393,11 @@ relock: goto relock; } + /* Has this task already been marked for death? */ + ksig->info.si_signo = signr = SIGKILL; + if (signal_group_exit(signal)) + goto fatal; + for (;;) { struct k_sigaction *ka; @@ -2488,6 +2493,7 @@ relock: continue; } + fatal: spin_unlock_irq(&sighand->siglock); /* From 7146db3317c67b517258cb5e1b08af387da0618b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 6 Feb 2019 17:51:47 -0600 Subject: [PATCH 0761/1436] signal: Better detection of synchronous signals Recently syzkaller was able to create unkillablle processes by creating a timer that is delivered as a thread local signal on SIGHUP, and receiving SIGHUP SA_NODEFERER. Ultimately causing a loop failing to deliver SIGHUP but always trying. When the stack overflows delivery of SIGHUP fails and force_sigsegv is called. Unfortunately because SIGSEGV is numerically higher than SIGHUP next_signal tries again to deliver a SIGHUP. From a quality of implementation standpoint attempting to deliver the timer SIGHUP signal is wrong. We should attempt to deliver the synchronous SIGSEGV signal we just forced. We can make that happening in a fairly straight forward manner by instead of just looking at the signal number we also look at the si_code. In particular for exceptions (aka synchronous signals) the si_code is always greater than 0. That still has the potential to pick up a number of asynchronous signals as in a few cases the same si_codes that are used for synchronous signals are also used for asynchronous signals, and SI_KERNEL is also included in the list of possible si_codes. Still the heuristic is much better and timer signals are definitely excluded. Which is enough to prevent all known ways for someone sending a process signals fast enough to cause unexpected and arguably incorrect behavior. Cc: stable@vger.kernel.org Fixes: a27341cd5fcb ("Prioritize synchronous signals over 'normal' signals") Tested-by: Dmitry Vyukov Reported-by: Dmitry Vyukov Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 5424cb0006bc..99fa8ff06fd9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in } EXPORT_SYMBOL_GPL(dequeue_signal); +static int dequeue_synchronous_signal(kernel_siginfo_t *info) +{ + struct task_struct *tsk = current; + struct sigpending *pending = &tsk->pending; + struct sigqueue *q, *sync = NULL; + + /* + * Might a synchronous signal be in the queue? + */ + if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) + return 0; + + /* + * Return the first synchronous signal in the queue. + */ + list_for_each_entry(q, &pending->list, list) { + /* Synchronous signals have a postive si_code */ + if ((q->info.si_code > SI_USER) && + (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { + sync = q; + goto next; + } + } + return 0; +next: + /* + * Check if there is another siginfo for the same signal. + */ + list_for_each_entry_continue(q, &pending->list, list) { + if (q->info.si_signo == sync->info.si_signo) + goto still_pending; + } + + sigdelset(&pending->signal, sync->info.si_signo); + recalc_sigpending(); +still_pending: + list_del_init(&sync->list); + copy_siginfo(info, &sync->info); + __sigqueue_free(sync); + return info->si_signo; +} + /* * Tell a process that it has a new active signal.. * @@ -2411,7 +2453,15 @@ relock: goto relock; } - signr = dequeue_signal(current, ¤t->blocked, &ksig->info); + /* + * Signals generated by the execution of an instruction + * need to be delivered before any other pending signals + * so that the instruction pointer in the signal stack + * frame points to the faulting instruction. + */ + signr = dequeue_synchronous_signal(&ksig->info); + if (!signr) + signr = dequeue_signal(current, ¤t->blocked, &ksig->info); if (!signr) break; /* will return 0 */ From 50d6b3cf9403879911e06d69c7ef41e43f8f7b4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 12 Dec 2018 11:49:47 +0000 Subject: [PATCH 0762/1436] ARM: OMAP2+: fix lack of timer interrupts on CPU1 after hotplug If we have a kernel configured for periodic timer interrupts, and we have cpuidle enabled, then we end up with CPU1 losing timer interupts after a hotplug. This can manifest itself in RCU stall warnings, or userspace becoming unresponsive. The problem is that the kernel initially wants to use the TWD timer for interrupts, but the TWD loses context when we enter the C3 cpuidle state. Nothing reprograms the TWD after idle. We have solved this in the past by switching to broadcast timer ticks, and cpuidle44xx switches to that mode at boot time. However, there is nothing to switch from periodic mode local timers after a hotplug operation. We call tick_broadcast_enter() in omap_enter_idle_coupled(), which one would expect would take care of the issue, but internally this only deals with one-shot local timers - tick_broadcast_enable() on the other hand only deals with periodic local timers. So, we need to call both. Signed-off-by: Russell King [tony@atomide.com: just standardized the subject line] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/cpuidle44xx.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index a8b291f00109..dae514c8276a 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -152,6 +152,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF); + /* Enter broadcast mode for periodic timers */ + tick_broadcast_enable(); + + /* Enter broadcast mode for one-shot timers */ tick_broadcast_enter(); /* @@ -218,15 +222,6 @@ fail: return index; } -/* - * For each cpu, setup the broadcast timer because local timers - * stops for the states above C1. - */ -static void omap_setup_broadcast_timer(void *arg) -{ - tick_broadcast_enable(); -} - static struct cpuidle_driver omap4_idle_driver = { .name = "omap4_idle", .owner = THIS_MODULE, @@ -319,8 +314,5 @@ int __init omap4_idle_init(void) if (!cpu_clkdm[0] || !cpu_clkdm[1]) return -ENODEV; - /* Configure the broadcast timer on each cpu */ - on_each_cpu(omap_setup_broadcast_timer, NULL, 1); - return cpuidle_register(idle_driver, cpu_online_mask); } From d04ca383860bef90a0dab4eb397907f7f05e839e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 6 Feb 2019 15:34:16 +0100 Subject: [PATCH 0763/1436] mt76x0u: fix suspend/resume We need to reset MCU and do other initializations on resume otherwise MT7610U device will fail to initialize, what cause system hung due to USB requests timeouts. Patch fixes 4.19 -> 4.20 regression. Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Stanislaw Gruszka Acked-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- .../net/wireless/mediatek/mt76/mt76x0/usb.c | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 0e6b43bb4678..a5ea3ba495a4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -158,6 +158,32 @@ static const struct ieee80211_ops mt76x0u_ops = { .get_txpower = mt76x02_get_txpower, }; +static int mt76x0u_init_hardware(struct mt76x02_dev *dev) +{ + int err; + + mt76x0_chip_onoff(dev, true, true); + + if (!mt76x02_wait_for_mac(&dev->mt76)) + return -ETIMEDOUT; + + err = mt76x0u_mcu_init(dev); + if (err < 0) + return err; + + mt76x0_init_usb_dma(dev); + err = mt76x0_init_hardware(dev); + if (err < 0) + return err; + + mt76_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e); + mt76_wr(dev, MT_TXOP_CTRL_CFG, + FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) | + FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58)); + + return 0; +} + static int mt76x0u_register_device(struct mt76x02_dev *dev) { struct ieee80211_hw *hw = dev->mt76.hw; @@ -171,26 +197,10 @@ static int mt76x0u_register_device(struct mt76x02_dev *dev) if (err < 0) goto out_err; - mt76x0_chip_onoff(dev, true, true); - if (!mt76x02_wait_for_mac(&dev->mt76)) { - err = -ETIMEDOUT; - goto out_err; - } - - err = mt76x0u_mcu_init(dev); + err = mt76x0u_init_hardware(dev); if (err < 0) goto out_err; - mt76x0_init_usb_dma(dev); - err = mt76x0_init_hardware(dev); - if (err < 0) - goto out_err; - - mt76_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e); - mt76_wr(dev, MT_TXOP_CTRL_CFG, - FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) | - FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58)); - err = mt76x0_register_device(dev); if (err < 0) goto out_err; @@ -301,6 +311,8 @@ static int __maybe_unused mt76x0_suspend(struct usb_interface *usb_intf, mt76u_stop_queues(&dev->mt76); mt76x0u_mac_stop(dev); + clear_bit(MT76_STATE_MCU_RUNNING, &dev->mt76.state); + mt76x0_chip_onoff(dev, false, false); usb_kill_urb(usb->mcu.res.urb); return 0; @@ -328,7 +340,7 @@ static int __maybe_unused mt76x0_resume(struct usb_interface *usb_intf) tasklet_enable(&usb->rx_tasklet); tasklet_enable(&usb->tx_tasklet); - ret = mt76x0_init_hardware(dev); + ret = mt76x0u_init_hardware(dev); if (ret) goto err; From 4cd3016ce996494f78fdfd87ea35c8ca5d0b413e Mon Sep 17 00:00:00 2001 From: Jurica Vukadin Date: Thu, 7 Feb 2019 16:29:37 +0100 Subject: [PATCH 0764/1436] ALSA: hda - Add quirk for HP EliteBook 840 G5 This enables mute LED support and fixes switching jacks when the laptop is docked. Signed-off-by: Jurica Vukadin Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 152f54137082..a4ee7656d9ee 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -924,6 +924,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), + SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83b3, "HP EliteBook 830 G5", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x83d3, "HP ProBook 640 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), From 8f289805616e81f7c1690931aa8a586c76f4fa88 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Wed, 6 Feb 2019 21:45:29 -0800 Subject: [PATCH 0765/1436] net: phy: fixed_phy: Fix fixed_phy not checking GPIO Fix fixed_phy not checking GPIO if no link_update callback is registered. In the original version all users registered a link_update callback so the issue was masked. Fixes: a5597008dbc2 ("phy: fixed_phy: Add gpio to determine link up/down.") Reviewed-by: Andrew Lunn Signed-off-by: Moritz Fischer Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index f136a23c1a35..d810f914aaa4 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -85,11 +85,11 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) s = read_seqcount_begin(&fp->seqcount); fp->status.link = !fp->no_carrier; /* Issue callback if user registered it. */ - if (fp->link_update) { + if (fp->link_update) fp->link_update(fp->phydev->attached_dev, &fp->status); - fixed_phy_update(fp); - } + /* Check the GPIO for change in status */ + fixed_phy_update(fp); state = fp->status; } while (read_seqcount_retry(&fp->seqcount, s)); From cfa39381173d5f969daf43582c95ad679189cbc9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 26 Jan 2019 01:54:33 +0100 Subject: [PATCH 0766/1436] kvm: fix kvm_ioctl_create_device() reference counting (CVE-2019-6974) kvm_ioctl_create_device() does the following: 1. creates a device that holds a reference to the VM object (with a borrowed reference, the VM's refcount has not been bumped yet) 2. initializes the device 3. transfers the reference to the device to the caller's file descriptor table 4. calls kvm_get_kvm() to turn the borrowed reference to the VM into a real reference The ownership transfer in step 3 must not happen before the reference to the VM becomes a proper, non-borrowed reference, which only happens in step 4. After step 3, an attacker can close the file descriptor and drop the borrowed reference, which can cause the refcount of the kvm object to drop to zero. This means that we need to grab a reference for the device before anon_inode_getfd(), otherwise the VM can disappear from under us. Fixes: 852b6d57dc7f ("kvm: add device control API") Cc: stable@kernel.org Signed-off-by: Jann Horn Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5ecea812cb6a..585845203db8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3000,8 +3000,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm, if (ops->init) ops->init(dev); + kvm_get_kvm(kvm); ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { + kvm_put_kvm(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); @@ -3009,7 +3011,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } - kvm_get_kvm(kvm); cd->fd = ret; return 0; } From 353c0956a618a07ba4bbe7ad00ff29fe70e8412a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Jan 2019 18:41:16 +0100 Subject: [PATCH 0767/1436] KVM: x86: work around leak of uninitialized stack contents (CVE-2019-7222) Bugzilla: 1671930 Emulation of certain instructions (VMXON, VMCLEAR, VMPTRLD, VMWRITE with memory operand, INVEPT, INVVPID) can incorrectly inject a page fault when passed an operand that points to an MMIO address. The page fault will use uninitialized kernel stack memory as the CR2 and error code. The right behavior would be to abort the VM with a KVM_EXIT_INTERNAL_ERROR exit to userspace; however, it is not an easy fix, so for now just ensure that the error code and CR2 are zero. Embargoed until Feb 7th 2019. Reported-by: Felix Wilhelm Cc: stable@kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d27206f6c01..e67ecf25e690 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5116,6 +5116,13 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + /* + * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED + * is returned, but our callers are not ready for that and they blindly + * call kvm_inject_page_fault. Ensure that they at least do not leak + * uninitialized kernel stack memory into cr2 and error code. + */ + memset(exception, 0, sizeof(*exception)); return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } From ecec76885bcfe3294685dc363fd1273df0d5d65f Mon Sep 17 00:00:00 2001 From: Peter Shier Date: Thu, 11 Oct 2018 11:46:46 -0700 Subject: [PATCH 0768/1436] KVM: nVMX: unconditionally cancel preemption timer in free_nested (CVE-2019-7221) Bugzilla: 1671904 There are multiple code paths where an hrtimer may have been started to emulate an L1 VMX preemption timer that can result in a call to free_nested without an intervening L2 exit where the hrtimer is normally cancelled. Unconditionally cancel in free_nested to cover all cases. Embargoed until Feb 7th 2019. Signed-off-by: Peter Shier Reported-by: Jim Mattson Reviewed-by: Jim Mattson Reported-by: Felix Wilhelm Cc: stable@kernel.org Message-Id: <20181011184646.154065-1-pshier@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8ff20523661b..d8ea4ebd79e7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -211,6 +211,7 @@ static void free_nested(struct kvm_vcpu *vcpu) if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; + hrtimer_cancel(&vmx->nested.preemption_timer); vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); From 1db64e8733f653814f041ffe1428524494ef6123 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:32 +0200 Subject: [PATCH 0769/1436] devlink: Add devlink formatted message (fmsg) API Devlink fmsg is a mechanism to pass descriptors between drivers and devlink, in json-like format. The API allows the driver to add nested attributes such as object, object pair and value array, in addition to attributes such as name and value. Driver can use this API to fill the fmsg context in a format which will be translated by the devlink to the netlink message later. There is no memory allocation in advance (other than the initial list head), and it dynamically allocates messages descriptors and add them to the list on the fly. When it needs to send the data using SKBs to the netlink layer, it fragments the data between different SKBs. In order to do this fragmentation, it uses virtual nests attributes, to avoid actual nesting use which cannot be divided between different SKBs. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 149 +++++++++++ include/uapi/linux/devlink.h | 8 + net/core/devlink.c | 483 +++++++++++++++++++++++++++++++++++ 3 files changed, 640 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 74d992a68a06..7c5722e816aa 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -448,6 +448,8 @@ struct devlink_info_req; typedef void devlink_snapshot_data_dest_t(const void *data); +struct devlink_fmsg; + struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, @@ -639,6 +641,37 @@ int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value); +int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); +int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); +int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); + +int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value); +int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value); +int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); +int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value); +int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); + +int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value); +int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value); +int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value); +int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value); +int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value); +int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u16 value_len); + #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -971,6 +1004,122 @@ devlink_info_version_running_put(struct devlink_info_req *req, { return 0; } + +static inline int +devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +{ + return 0; +} + +static inline int +devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +{ + return 0; +} + +static inline int +devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +{ + return 0; +} + +static inline int +devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +{ + return 0; +} + +static inline int +devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + return 0; +} + +static inline int +devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +{ + return 0; +} + +static inline int +devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +{ + return 0; +} + +static inline int +devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +{ + return 0; +} + +static inline int +devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +{ + return 0; +} + +static inline int +devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +{ + return 0; +} + +static inline int +devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +{ + return 0; +} + +static inline int +devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) +{ + return 0; +} + +static inline int +devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) +{ + return 0; +} + +static inline int +devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) +{ + return 0; +} + +static inline int +devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) +{ + return 0; +} + +static inline int +devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) +{ + return 0; +} + +static inline int +devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) +{ + return 0; +} + +static inline int +devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u16 value_len) +{ + return 0; +} #endif #if IS_REACHABLE(CONFIG_NET_DEVLINK) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 054b2d1a4537..076692209a9b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -302,6 +302,14 @@ enum devlink_attr { DEVLINK_ATTR_SB_POOL_CELL_SIZE, /* u32 */ + DEVLINK_ATTR_FMSG, /* nested */ + DEVLINK_ATTR_FMSG_OBJ_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_PAIR_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_ARR_NEST_START, /* flag */ + DEVLINK_ATTR_FMSG_NEST_END, /* flag */ + DEVLINK_ATTR_FMSG_OBJ_NAME, /* string */ + DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, /* u8 */ + DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA, /* dynamic */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index cd0d393bc62d..03883697fcf0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3879,6 +3879,489 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, return msg->len; } +struct devlink_fmsg_item { + struct list_head list; + int attrtype; + u8 nla_type; + u16 len; + int value[0]; +}; + +struct devlink_fmsg { + struct list_head item_list; +}; + +static struct devlink_fmsg *devlink_fmsg_alloc(void) +{ + struct devlink_fmsg *fmsg; + + fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL); + if (!fmsg) + return NULL; + + INIT_LIST_HEAD(&fmsg->item_list); + + return fmsg; +} + +static void devlink_fmsg_free(struct devlink_fmsg *fmsg) +{ + struct devlink_fmsg_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) { + list_del(&item->list); + kfree(item); + } + kfree(fmsg); +} + +static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, + int attrtype) +{ + struct devlink_fmsg_item *item; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->attrtype = attrtype; + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); + +static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); +} + +int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); + +#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) + +static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) +{ + struct devlink_fmsg_item *item; + + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = NLA_NUL_STRING; + item->len = strlen(name) + 1; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; + memcpy(&item->value, name, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +{ + int err; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); + if (err) + return err; + + err = devlink_fmsg_put_name(fmsg, name); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); + +int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); + +int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); + +int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); + +static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, + const void *value, u16 value_len, + u8 value_nla_type) +{ + struct devlink_fmsg_item *item; + + if (value_len > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = value_nla_type; + item->len = value_len; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + memcpy(&item->value, value, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put); + +int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put); + +int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); + +int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put); + +int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +{ + return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, + NLA_NUL_STRING); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); + +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) +{ + return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); + +int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_bool_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); + +int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u8_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); + +int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u32_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); + +int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u64_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); + +int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_string_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); + +int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u16 value_len) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_binary_put(fmsg, value, value_len); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); + +static int +devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + switch (msg->nla_type) { + case NLA_FLAG: + case NLA_U8: + case NLA_U32: + case NLA_U64: + case NLA_NUL_STRING: + case NLA_BINARY: + return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, + msg->nla_type); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + u8 tmp; + + switch (msg->nla_type) { + case NLA_FLAG: + /* Always provide flag data, regardless of its value */ + tmp = *(bool *) msg->value; + + return nla_put_u8(skb, attrtype, tmp); + case NLA_U8: + return nla_put_u8(skb, attrtype, *(u8 *) msg->value); + case NLA_U32: + return nla_put_u32(skb, attrtype, *(u32 *) msg->value); + case NLA_U64: + return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value, + DEVLINK_ATTR_PAD); + case NLA_NUL_STRING: + return nla_put_string(skb, attrtype, (char *) &msg->value); + case NLA_BINARY: + return nla_put(skb, attrtype, msg->len, (void *) &msg->value); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, + int *start) +{ + struct devlink_fmsg_item *item; + struct nlattr *fmsg_nlattr; + int i = 0; + int err; + + fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG); + if (!fmsg_nlattr) + return -EMSGSIZE; + + list_for_each_entry(item, &fmsg->item_list, list) { + if (i < *start) { + i++; + continue; + } + + switch (item->attrtype) { + case DEVLINK_ATTR_FMSG_OBJ_NEST_START: + case DEVLINK_ATTR_FMSG_PAIR_NEST_START: + case DEVLINK_ATTR_FMSG_ARR_NEST_START: + case DEVLINK_ATTR_FMSG_NEST_END: + err = nla_put_flag(skb, item->attrtype); + break; + case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA: + err = devlink_fmsg_item_fill_type(item, skb); + if (err) + break; + err = devlink_fmsg_item_fill_data(item, skb); + break; + case DEVLINK_ATTR_FMSG_OBJ_NAME: + err = nla_put_string(skb, item->attrtype, + (char *) &item->value); + break; + default: + err = -EINVAL; + break; + } + if (!err) + *start = ++i; + else + break; + } + + nla_nest_end(skb, fmsg_nlattr); + return err; +} + +static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, + struct genl_info *info, + enum devlink_command cmd, int flags) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + bool last = false; + int index = 0; + void *hdr; + int err; + + while (!last) { + int tmp_index = index; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, flags | NLM_F_MULTI, cmd); + if (!hdr) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + err = devlink_fmsg_prepare_skb(fmsg, skb, &index); + if (!err) + last = true; + else if (err != -EMSGSIZE || tmp_index == index) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + err = genlmsg_reply(skb, info); + if (err) + return err; + } + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = -EMSGSIZE; + goto nla_put_failure; + } + err = genlmsg_reply(skb, info); + if (err) + return err; + + return 0; + +nla_put_failure: + nlmsg_free(skb); + return err; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, From a0bdcc59d194d9d2ccb679015e66d8562b0b9e84 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:33 +0200 Subject: [PATCH 0770/1436] devlink: Add health reporter create/destroy functionality Devlink health reporter is an instance for reporting, diagnosing and recovering from run time errors discovered by the reporters. Define it's data structure and supported operations. In addition, expose devlink API to create and destroy a reporter. Each devlink instance will hold it's own reporters list. As part of the allocation, driver shall provide a set of callbacks which will be used by devlink in order to handle health reports and user commands related to this reporter. In addition, driver is entitled to provide some priv pointer, which can be fetched from the reporter by devlink_health_reporter_priv function. For each reporter, devlink will hold a metadata of statistics, dump msg and status. For passing dumps and diagnose data to the user-space, it will use devlink fmsg API. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 53 +++++++++++++++++++++++++ net/core/devlink.c | 92 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 7c5722e816aa..3dfe30235878 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -30,6 +30,7 @@ struct devlink { struct list_head param_list; struct list_head region_list; u32 snapshot_id; + struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -449,6 +450,27 @@ struct devlink_info_req; typedef void devlink_snapshot_data_dest_t(const void *data); struct devlink_fmsg; +struct devlink_health_reporter; + +/** + * struct devlink_health_reporter_ops - Reporter operations + * @name: reporter name + * @recover: callback to recover from reported error + * if priv_ctx is NULL, run a full recover + * @dump: callback to dump an object + * if priv_ctx is NULL, run a full dump + * @diagnose: callback to diagnose the current status + */ + +struct devlink_health_reporter_ops { + char *name; + int (*recover)(struct devlink_health_reporter *reporter, + void *priv_ctx); + int (*dump)(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx); + int (*diagnose)(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg); +}; struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); @@ -672,6 +694,17 @@ int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, const void *value, u16 value_len); +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv); +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter); + #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -1120,6 +1153,26 @@ devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, { return 0; } + +static inline struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + return NULL; +} + +static inline void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ +} + +static inline void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return NULL; +} #endif #if IS_REACHABLE(CONFIG_NET_DEVLINK) diff --git a/net/core/devlink.c b/net/core/devlink.c index 03883697fcf0..341548d7f1f1 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4362,6 +4362,97 @@ nla_put_failure: return err; } +struct devlink_health_reporter { + struct list_head list; + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + u64 graceful_period; + bool auto_recover; + u8 health_state; +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, &devlink->reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +/** + * devlink_health_reporter_create - create devlink health reporter + * + * @devlink: devlink + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @auto_recover: auto recover when error occurs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&devlink->lock); + if (devlink_health_reporter_find_by_name(devlink, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + if (WARN_ON(auto_recover && !ops->recover) || + WARN_ON(graceful_period && !ops->recover)) { + reporter = ERR_PTR(-EINVAL); + goto unlock; + } + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) { + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = auto_recover; + list_add_tail(&reporter->list, &devlink->reporter_list); +unlock: + mutex_unlock(&devlink->lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +/** + * devlink_health_reporter_destroy - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + mutex_lock(&reporter->devlink->lock); + list_del(&reporter->list); + mutex_unlock(&reporter->devlink->lock); + kfree(reporter); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -4670,6 +4761,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; } From c8e1da0bf9238e460c73ebc5a07ce93697e5e207 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:34 +0200 Subject: [PATCH 0771/1436] devlink: Add health report functionality Upon error discover, every driver can report it to the devlink health mechanism via devlink_health_report function, using the appropriate reporter registered to it. Driver can pass error specific context which will be delivered to it as part of the dump / recovery callbacks. Once an error is reported, devlink health will do the following actions: * A log is being send to the kernel trace events buffer * Health status and statistics are being updated for the reporter instance * Object dump is being taken and stored at the reporter instance (as long as there is no other dump which is already stored) * Auto recovery attempt is being done. Depends on: - Auto Recovery configuration - Grace period vs. Time since last recover Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 9 +++ include/trace/events/devlink.h | 65 ++++++++++++++++++ net/core/devlink.c | 119 +++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 3dfe30235878..c12ad6e9095d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -704,6 +704,8 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); void * devlink_health_reporter_priv(struct devlink_health_reporter *reporter); +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx); #else @@ -1173,6 +1175,13 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter) { return NULL; } + +static inline int +devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + return 0; +} #endif #if IS_REACHABLE(CONFIG_NET_DEVLINK) diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 40705364a50f..191ddf67d769 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -75,6 +75,71 @@ TRACE_EVENT(devlink_hwerr, __get_str(driver_name), __entry->err, __get_str(msg)) ); +/* + * Tracepoint for devlink health message: + */ +TRACE_EVENT(devlink_health_report, + TP_PROTO(const struct devlink *devlink, const char *reporter_name, + const char *msg), + + TP_ARGS(devlink, reporter_name, msg), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) + __string(reporter_name, msg) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(reporter_name, reporter_name); + __assign_str(msg, msg); + ), + + TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s", + __get_str(bus_name), __get_str(dev_name), + __get_str(driver_name), __get_str(reporter_name), + __get_str(msg)) +); + +/* + * Tracepoint for devlink health recover aborted message: + */ +TRACE_EVENT(devlink_health_recover_aborted, + TP_PROTO(const struct devlink *devlink, const char *reporter_name, + bool health_state, u64 time_since_last_recover), + + TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), + + TP_STRUCT__entry( + __string(bus_name, devlink->dev->bus->name) + __string(dev_name, dev_name(devlink->dev)) + __string(driver_name, devlink->dev->driver->name) + __string(reporter_name, reporter_name) + __field(bool, health_state) + __field(u64, time_since_last_recover) + ), + + TP_fast_assign( + __assign_str(bus_name, devlink->dev->bus->name); + __assign_str(dev_name, dev_name(devlink->dev)); + __assign_str(driver_name, devlink->dev->driver->name); + __assign_str(reporter_name, reporter_name); + __entry->health_state = health_state; + __entry->time_since_last_recover = time_since_last_recover; + ), + + TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover=%llu recover aborted", + __get_str(bus_name), __get_str(dev_name), + __get_str(driver_name), __get_str(reporter_name), + __entry->health_state, + __entry->time_since_last_recover) +); + #endif /* _TRACE_DEVLINK_H */ /* This part must be outside protection */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 341548d7f1f1..3eaa290831aa 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4367,9 +4367,20 @@ struct devlink_health_reporter { void *priv; const struct devlink_health_reporter_ops *ops; struct devlink *devlink; + struct devlink_fmsg *dump_fmsg; + struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; bool auto_recover; u8 health_state; + u64 dump_ts; + u64 error_count; + u64 recovery_count; + u64 last_recovery_ts; +}; + +enum devlink_health_reporter_state { + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, + DEVLINK_HEALTH_REPORTER_STATE_ERROR, }; void * @@ -4431,6 +4442,7 @@ devlink_health_reporter_create(struct devlink *devlink, reporter->devlink = devlink; reporter->graceful_period = graceful_period; reporter->auto_recover = auto_recover; + mutex_init(&reporter->dump_lock); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: mutex_unlock(&devlink->lock); @@ -4449,10 +4461,117 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) mutex_lock(&reporter->devlink->lock); list_del(&reporter->list); mutex_unlock(&reporter->devlink->lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); kfree(reporter); } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); +static int +devlink_health_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->recover) + return -EOPNOTSUPP; + + err = reporter->ops->recover(reporter, priv_ctx); + if (err) + return err; + + reporter->recovery_count++; + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + reporter->last_recovery_ts = jiffies; + + return 0; +} + +static void +devlink_health_dump_clear(struct devlink_health_reporter *reporter) +{ + if (!reporter->dump_fmsg) + return; + devlink_fmsg_free(reporter->dump_fmsg); + reporter->dump_fmsg = NULL; +} + +static int devlink_health_do_dump(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->dump) + return 0; + + if (reporter->dump_fmsg) + return 0; + + reporter->dump_fmsg = devlink_fmsg_alloc(); + if (!reporter->dump_fmsg) { + err = -ENOMEM; + return err; + } + + err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); + if (err) + goto dump_err; + + err = reporter->ops->dump(reporter, reporter->dump_fmsg, + priv_ctx); + if (err) + goto dump_err; + + err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + if (err) + goto dump_err; + + reporter->dump_ts = jiffies; + + return 0; + +dump_err: + devlink_health_dump_clear(reporter); + return err; +} + +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + struct devlink *devlink = reporter->devlink; + + /* write a log message of the current error */ + WARN_ON(!msg); + trace_devlink_health_report(devlink, reporter->ops->name, msg); + reporter->error_count++; + + /* abort if the previous error wasn't recovered */ + if (reporter->auto_recover && + (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || + jiffies - reporter->last_recovery_ts < + msecs_to_jiffies(reporter->graceful_period))) { + trace_devlink_health_recover_aborted(devlink, + reporter->ops->name, + reporter->health_state, + jiffies - + reporter->last_recovery_ts); + return -ECANCELED; + } + + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + + mutex_lock(&reporter->dump_lock); + /* store current dump of current error, for later analysis */ + devlink_health_do_dump(reporter, priv_ctx); + mutex_unlock(&reporter->dump_lock); + + if (reporter->auto_recover) + return devlink_health_reporter_recover(reporter, priv_ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_report); + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, From 7afe335a8bede4e2839b0e0fa36ef629fe4a0206 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:35 +0200 Subject: [PATCH 0772/1436] devlink: Add health get command Add devlink health get command to provide reporter/s data for user space. Add the ability to get data per reporter or dump data from all available reporters. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 11 +++ net/core/devlink.c | 149 +++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 076692209a9b..d8f20d6ce139 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -96,6 +96,8 @@ enum devlink_command { DEVLINK_CMD_INFO_GET, /* can dump */ + DEVLINK_CMD_HEALTH_REPORTER_GET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -310,6 +312,15 @@ enum devlink_attr { DEVLINK_ATTR_FMSG_OBJ_NAME, /* string */ DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, /* u8 */ DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA, /* dynamic */ + + DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ + DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 3eaa290831aa..86f7c0e5d4bc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4572,6 +4572,146 @@ int devlink_health_report(struct devlink_health_reporter *reporter, } EXPORT_SYMBOL_GPL(devlink_health_report); +static struct devlink_health_reporter * +devlink_health_reporter_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *reporter_name; + + if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) + return NULL; + + reporter_name = + nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); + return devlink_health_reporter_find_by_name(devlink, reporter_name); +} + +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct sk_buff *msg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + info->snd_portid, info->snd_seq, + 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int +devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_health_reporter *reporter; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(reporter, &devlink->reporter_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, + reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -4597,6 +4737,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -4840,6 +4981,14 @@ static const struct genl_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .doit = devlink_nl_cmd_health_reporter_get_doit, + .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From a1e55ec0a0c6969cb7e9d9080a84041bb7b2b6e6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:36 +0200 Subject: [PATCH 0773/1436] devlink: Add health set command Add devlink health set command, in order to set configuration parameters for a specific reporter. Supported parameters are: - graceful_period: Time interval between auto recoveries (in msec) - auto_recover: Determines if the devlink shall execute recover upon receiving error for the reporter Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index d8f20d6ce139..b03065a99884 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -97,6 +97,7 @@ enum devlink_command { DEVLINK_CMD_INFO_GET, /* can dump */ DEVLINK_CMD_HEALTH_REPORTER_GET, + DEVLINK_CMD_HEALTH_REPORTER_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 86f7c0e5d4bc..0b231fb76e59 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4712,6 +4712,33 @@ out: return msg->len; } +static int +devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->recover && + (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + return -EOPNOTSUPP; + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + reporter->graceful_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) + reporter->auto_recover = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -4738,6 +4765,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -4989,6 +5018,13 @@ static const struct genl_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .doit = devlink_nl_cmd_health_reporter_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From 20a0943a5b237f7d59dc581e9e3637f5c87f1fde Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:37 +0200 Subject: [PATCH 0774/1436] devlink: Add health recover command Add devlink health recover command to the uapi, in order to allow the user to execute a recover operation over a specific reporter. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b03065a99884..a3a97e6edad8 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -98,6 +98,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, + DEVLINK_CMD_HEALTH_REPORTER_RECOVER, /* add new commands above here */ __DEVLINK_CMD_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 0b231fb76e59..0e6b0e034863 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4739,6 +4739,19 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, return 0; } +static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + return devlink_health_reporter_recover(reporter, NULL); +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -5025,6 +5038,13 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .doit = devlink_nl_cmd_health_reporter_recover_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From fca42a2794e31379855c7d687055da43a6e05eef Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:38 +0200 Subject: [PATCH 0775/1436] devlink: Add health diagnose command Add devlink health diagnose command, in order to run a diagnose operation over a specific reporter. It is expected from driver's callback for diagnose command to fill it via the devlink fmsg API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a3a97e6edad8..09be37137841 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -99,6 +99,7 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_GET, DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, /* add new commands above here */ __DEVLINK_CMD_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 0e6b0e034863..1e8613db2bf7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4752,6 +4752,45 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, return devlink_health_reporter_recover(reporter, NULL); } +static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct devlink_fmsg *fmsg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->diagnose) + return -EOPNOTSUPP; + + fmsg = devlink_fmsg_alloc(); + if (!fmsg) + return -ENOMEM; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + goto out; + + err = reporter->ops->diagnose(reporter, fmsg); + if (err) + goto out; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + goto out; + + err = devlink_fmsg_snd(fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); + +out: + devlink_fmsg_free(fmsg); + return err; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -5045,6 +5084,13 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .doit = devlink_nl_cmd_health_reporter_diagnose_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From 35455e23e6f3cffe20e2b948e57597a8dc240b1e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:39 +0200 Subject: [PATCH 0776/1436] devlink: Add health dump {get,clear} commands Add devlink health dump commands, in order to run an dump operation over a specific reporter. The supported operations are dump_get in order to get last saved dump (if not exist, dump now) and dump_clear to clear last saved dump. It is expected from driver's callback for diagnose command to fill it via the devlink fmsg API. Devlink will parse it and convert it to netlink nla API in order to pass it to the user. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 63 ++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 09be37137841..72d9f7c89190 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -100,6 +100,8 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_SET, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, /* add new commands above here */ __DEVLINK_CMD_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 1e8613db2bf7..7fbdba547d4f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4791,6 +4791,53 @@ out: return err; } +static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + err = devlink_health_do_dump(reporter, NULL); + if (err) + goto out; + + err = devlink_fmsg_snd(reporter->dump_fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0); + +out: + mutex_unlock(&reporter->dump_lock); + return err; +} + +static int +devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + devlink_health_dump_clear(reporter); + mutex_unlock(&reporter->dump_lock); + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -5091,6 +5138,22 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .doit = devlink_nl_cmd_health_reporter_dump_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From de8650a820715ea619b687b4a315794593be1d59 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:40 +0200 Subject: [PATCH 0777/1436] net/mlx5e: Add tx reporter support Add mlx5e tx reporter to devlink health reporters. This reporter will be responsible for diagnosing, reporting and recovering of tx errors. This patch declares the TX reporter operations and creates it using the devlink health API. Currently, this reporter supports reporting and recovering from send error CQE only. In addition, it adds diagnose information for the open SQs. For a local SQ recover (due to driver error report), in case of SQ recover failure, the recover operation will be considered as a failure. For a full tx recover, an attempt to close and open the channels will be done. If this one passed successfully, it will be considered as a successful recover. The SQ recover from error CQE flow is not a new feature in the driver, this patch re-organize the functions and adapt them for the devlink health API. For this purpose, move code from en_main.c to a new file named reporter_tx.c. Diagnose output: $devlink health diagnose pci/0000:00:09.0 reporter tx -j -p { "SQs": [ { "sqn": 138, "HW state": 1, "stopped": false },{ "sqn": 142, "HW state": 1, "stopped": false } ] } $devlink health diagnose pci/0000:00:09.0 reporter tx SQs: sqn: 138 HW state: 1 stopped: false sqn: 142 HW state: 1 stopped: false Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Saeed Mahameed Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 18 +- .../ethernet/mellanox/mlx5/core/en/reporter.h | 14 + .../mellanox/mlx5/core/en/reporter_tx.c | 259 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 136 +-------- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +- 6 files changed, 306 insertions(+), 128 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..6bb2a860b15b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6dd74ef69389..66e510b16243 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -387,10 +387,7 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct mlx5e_txqsq_recover { - struct work_struct recover_work; - u64 last_recover; - } recover; + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -683,6 +680,13 @@ struct mlx5e_rss_params { u8 hfunc; }; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -738,6 +742,7 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif + struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -868,6 +873,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); + static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h new file mode 100644 index 000000000000..1c90059db5f8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_REPORTER_H +#define __MLX5E_EN_REPORTER_H + +#include +#include "en.h" + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 000000000000..35568e4820de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "reporter.h" +#include "lib/eq.h" + +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 + +struct mlx5e_tx_err_ctx { + int (*recover)(struct mlx5e_txqsq *sq); + struct mlx5e_txqsq *sq; +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return err; + } + + if (state != MLX5_SQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return -EINVAL; + } + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + return err; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_sq_to_ready(sq, state); + if (err) + return err; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + mlx5e_activate_txqsq(sq); + + return 0; +} + +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx = {0}; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); + + devlink_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->sq); +} + +static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +{ + int err; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_tx_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_tx_reporter_recover_all(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, + u32 sqn, u8 state, bool stopped) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; + i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + u8 state; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + break; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, + state, + netif_xmit_stopped(sq->txq)); + if (err) + break; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "tx", + .recover = mlx5e_tx_reporter_recover, + .diagnose = mlx5e_tx_reporter_diagnose, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct devlink *devlink = priv_to_devlink(mdev); + + priv->tx_reporter = + devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, + true, priv); + if (IS_ERR_OR_NULL(priv->tx_reporter)) + netdev_warn(priv->netdev, + "Failed to create tx reporter, err = %ld\n", + PTR_ERR(priv->tx_reporter)); + return PTR_ERR_OR_ZERO(priv->tx_reporter); +} + +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->tx_reporter)) + return; + + devlink_health_reporter_destroy(priv->tx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 099d307e6f25..93e1f037b019 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,6 +51,7 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" +#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1159,7 +1160,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_sq_recover(struct work_struct *work); +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1181,7 +1182,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1269,15 +1270,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - bool rl_update; - int rl_index; -}; - -static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1375,17 +1369,7 @@ err_free_txqsq: return err; } -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1394,7 +1378,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -static inline void netif_tx_disable_queue(struct netdev_queue *txq) +void mlx5e_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1410,7 +1394,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - netif_tx_disable_queue(sq->txq); + mlx5e_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1430,6 +1414,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1439,105 +1424,12 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; - - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static void mlx5e_sq_recover(struct work_struct *work) -{ - struct mlx5e_txqsq_recover *recover = - container_of(work, struct mlx5e_txqsq_recover, - recover_work); - struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, - recover); - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return; - } - - netif_tx_disable_queue(sq->txq); - - if (mlx5e_wait_for_sq_flush(sq)) - return; - - /* If the interval between two consecutive recovers per SQ is too - * short, don't recover to avoid infinite loop of ERR_CQE -> recover. - * If we reached this state, there is probably a bug that needs to be - * fixed. let's keep the queue close and let tx timeout cleanup. - */ - if (jiffies_to_msecs(jiffies - recover->last_recover) < - MLX5E_SQ_RECOVER_MIN_INTERVAL) { - netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", - sq->sqn); - return; - } - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - if (mlx5e_sq_to_ready(sq, state)) - return; - - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - recover->last_recover = jiffies; - mlx5e_activate_txqsq(sq); + mlx5e_tx_reporter_err_cqe(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -3236,6 +3128,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; + mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -4953,6 +4846,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif + mlx5e_tx_reporter_create(priv); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 598ad7e4d5c9..189211295e48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -513,8 +513,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) &sq->state)) { mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); - queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + if (!IS_ERR_OR_NULL(cq->channel->priv->tx_reporter)) + queue_work(cq->channel->priv->wq, + &sq->recover_work); } stats->cqe_err++; } From 7d91126b1aea9eea7fdc9f5cd9e315002f33768d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 7 Feb 2019 11:36:41 +0200 Subject: [PATCH 0778/1436] net/mlx5e: Add tx timeout support for mlx5e tx reporter With this patch, ndo_tx_timeout callback will be redirected to the tx reporter in order to detect a tx timeout error and report it to the devlink health. (The watchdog detects tx timeouts, but the driver verify the issue still exists before launching any recover method). In addition, recover from tx timeout in case of lost interrupt was added to the tx reporter recover method. The tx timeout recover from lost interrupt is not a new feature in the driver, this patch re-organize the functionality and move it to the tx reporter recovery flow. tx timeout example: (with auto_recover set to false, if set to true, the manual recover and diagnose sections are irrelevant) $cat /sys/kernel/debug/tracing/trace ... devlink_health_report: bus_name=pci dev_name=0000:00:09.0 driver_name=mlx5_core reporter_name=tx: TX timeout on queue: 0, SQ: 0x8a, CQ: 0x35, SQ Cons: 0x2 SQ Prod: 0x2, usecs since last trans: 14912000 $devlink health show pci/0000:00:09.0: name tx state healthy #err 1 #recover 0 last_dump_ts N/A parameters: grace_period 500 auto_recover false $devlink health diagnose pci/0000:00:09.0 reporter tx -j -p { "SQs": [ { "sqn": 138, "HW state": 1, "stopped": true },{ "sqn": 142, "HW state": 1, "stopped": false } ] } $devlink health diagnose pci/0000:00:09.0 reporter tx SQs: sqn: 138 HW state: 1 stopped: true sqn: 142 HW state: 1 stopped: false $devlink health recover pci/0000:00:09 reporter tx $devlink health show pci/0000:00:09.0: name tx state healthy #err 1 #recover 1 last_dump_ts N/A parameters: grace_period 500 auto_recover false Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Acked-by: Saeed Mahameed Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en/reporter.h | 1 + .../mellanox/mlx5/core/en/reporter_tx.c | 38 +++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 53 ++++++------------- 3 files changed, 55 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h index 1c90059db5f8..e78e92753d73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -10,5 +10,6 @@ int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 35568e4820de..0aebfb377cf0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -126,6 +126,44 @@ void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) &err_ctx); } +static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; + int ret; + + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + ret = eqe_count ? true : false; + if (!eqe_count) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + return ret; + } + + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; + return ret; +} + +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + sprintf(err_str, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return devlink_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 93e1f037b019..d81ebba7c04e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4116,31 +4116,13 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, - struct mlx5e_txqsq *sq) -{ - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->core.eqn, eq->core.cons_index, eq->core.irqn); - - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) - return false; - - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); - sq->channel->stats->eq_rearm++; - return true; -} - static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - struct net_device *dev = priv->netdev; - bool reopen_channels = false; - int i, err; + bool report_failed = false; + int err; + int i; rtnl_lock(); mutex_lock(&priv->state_lock); @@ -4149,31 +4131,22 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); + struct netdev_queue *dev_queue = + netdev_get_tx_queue(priv->netdev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - dev_queue->trans_start)); - - /* If we recover a lost interrupt, most likely TX timeout will - * be resolved, skip reopening channels - */ - if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - reopen_channels = true; - } + if (mlx5e_tx_reporter_timeout(sq)) + report_failed = true; } - if (!reopen_channels) + if (!report_failed) goto unlock; - mlx5e_close_locked(dev); - err = mlx5e_open_locked(dev); + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); if (err) netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", @@ -4189,6 +4162,12 @@ static void mlx5e_tx_timeout(struct net_device *dev) struct mlx5e_priv *priv = netdev_priv(dev); netdev_err(dev, "TX timeout detected\n"); + + if (IS_ERR_OR_NULL(priv->tx_reporter)) { + netdev_err_once(priv->netdev, "tx timeout will not be handled, no valid tx reporter\n"); + return; + } + queue_work(priv->wq, &priv->tx_timeout_work); } From db2ab7a08f06aa4dd784b87bafeb3169ee7f0d95 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 7 Feb 2019 11:36:42 +0200 Subject: [PATCH 0779/1436] devlink: Add Documentation/networking/devlink-health.txt This patch adds a new file to add information about devlink health mechanism. Signed-off-by: Aya Levin Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/devlink-health.txt | 86 +++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 Documentation/networking/devlink-health.txt diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt new file mode 100644 index 000000000000..1db3fbea0831 --- /dev/null +++ b/Documentation/networking/devlink-health.txt @@ -0,0 +1,86 @@ +The health mechanism is targeted for Real Time Alerting, in order to know when +something bad had happened to a PCI device +- Provide alert debug information +- Self healing +- If problem needs vendor support, provide a way to gather all needed debugging + information. + +The main idea is to unify and centralize driver health reports in the +generic devlink instance and allow the user to set different +attributes of the health reporting and recovery procedures. + +The devlink health reporter: +Device driver creates a "health reporter" per each error/health type. +Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error) +or unknown (driver specific). +For each registered health reporter a driver can issue error/health reports +asynchronously. All health reports handling is done by devlink. +Device driver can provide specific callbacks for each "health reporter", e.g. + - Recovery procedures + - Diagnostics and object dump procedures + - OOB initial parameters +Different parts of the driver can register different types of health reporters +with different handlers. + +Once an error is reported, devlink health will do the following actions: + * A log is being send to the kernel trace events buffer + * Health status and statistics are being updated for the reporter instance + * Object dump is being taken and saved at the reporter instance (as long as + there is no other dump which is already stored) + * Auto recovery attempt is being done. Depends on: + - Auto-recovery configuration + - Grace period vs. time passed since last recover + +The user interface: +User can access/change each reporter's parameters and driver specific callbacks +via devlink, e.g per error type (per health reporter) + - Configure reporter's generic parameters (like: disable/enable auto recovery) + - Invoke recovery procedure + - Run diagnostics + - Object dump + +The devlink health interface (via netlink): +DEVLINK_CMD_HEALTH_REPORTER_GET + Retrieves status and configuration info per DEV and reporter. +DEVLINK_CMD_HEALTH_REPORTER_SET + Allows reporter-related configuration setting. +DEVLINK_CMD_HEALTH_REPORTER_RECOVER + Triggers a reporter's recovery procedure. +DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE + Retrieves diagnostics data from a reporter on a device. +DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET + Retrieves the last stored dump. Devlink health + saves a single dump. If an dump is not already stored by the devlink + for this reporter, devlink generates a new dump. + dump output is defined by the reporter. +DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR + Clears the last saved dump file for the specified reporter. + + + netlink + +--------------------------+ + | | + | + | + | | | + +--------------------------+ + |request for ops + |(diagnose, + mlx5_core devlink |recover, + |dump) ++--------+ +--------------------------+ +| | | reporter| | +| | | +---------v----------+ | +| | ops execution | | | | +| <----------------------------------+ | | +| | | | | | +| | | + ^------------------+ | +| | | | request for ops | +| | | | (recover, dump) | +| | | | | +| | | +-+------------------+ | +| | health report | | health handler | | +| +-------------------------------> | | +| | | +--------------------+ | +| | health reporter create | | +| +----------------------------> | ++--------+ +--------------------------+ From c0a47e44c0980b3b23ee31fa7936d70ea5dce491 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 7 Feb 2019 18:36:10 +0800 Subject: [PATCH 0780/1436] geneve: should not call rt6_lookup() when ipv6 was disabled When we add a new GENEVE device with IPv6 remote, checking only for IS_ENABLED(CONFIG_IPV6) is not enough as we may disable IPv6 in the kernel command line (ipv6.disable=1), and calling rt6_lookup() would cause a NULL pointer dereference. v2: - don't mix declarations and code (reported by Stefano Brivio, Eric Dumazet) - there's no need to use in6_dev_get() as we only need to check that idev exists (reported by David Ahern). This is under RTNL, so we can simply use __in6_dev_get() instead (Stefano, Eric). Reported-by: Jianlin Shi Fixes: c40e89fd358e9 ("geneve: configure MTU based on a lower device") Cc: Alexey Kodanev Signed-off-by: Hangbin Liu Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58bbba8582b0..3377ac66a347 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1512,9 +1512,13 @@ static void geneve_link_config(struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { - struct rt6_info *rt = rt6_lookup(geneve->net, - &info->key.u.ipv6.dst, NULL, 0, - NULL, 0); + struct rt6_info *rt; + + if (!__in6_dev_get(dev)) + break; + + rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, + NULL, 0); if (rt && rt->dst.dev) ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; From 173656accaf583698bac3f9e269884ba60d51ef4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 7 Feb 2019 18:36:11 +0800 Subject: [PATCH 0781/1436] sit: check if IPv6 enabled before calling ip6_err_gen_icmpv6_unreach() If we disabled IPv6 from the kernel command line (ipv6.disable=1), we should not call ip6_err_gen_icmpv6_unreach(). This: ip link add sit1 type sit local 192.0.2.1 remote 192.0.2.2 ttl 1 ip link set sit1 up ip addr add 198.51.100.1/24 dev sit1 ping 198.51.100.2 if IPv6 is disabled at boot time, will crash the kernel. v2: there's no need to use in6_dev_get(), use __in6_dev_get() instead, as we only need to check that idev exists and we are under rcu_read_lock() (from netif_receive_skb_internal()). Reported-by: Jianlin Shi Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") Cc: Oussama Ghorbel Signed-off-by: Hangbin Liu Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv6/sit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1e03305c0549..e8a1dabef803 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) From 2bc16b9f3223d049b57202ee702fcb5b9b507019 Mon Sep 17 00:00:00 2001 From: Manuel Reinhardt Date: Thu, 31 Jan 2019 15:32:35 +0100 Subject: [PATCH 0782/1436] ALSA: usb-audio: Fix implicit fb endpoint setup by quirk The commit a60945fd08e4 ("ALSA: usb-audio: move implicit fb quirks to separate function") introduced an error in the handling of quirks for implicit feedback endpoints. This commit fixes this. If a quirk successfully sets up an implicit feedback endpoint, usb-audio no longer tries to find the implicit fb endpoint itself. Fixes: a60945fd08e4 ("ALSA: usb-audio: move implicit fb quirks to separate function") Signed-off-by: Manuel Reinhardt Cc: Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 382847154227..db114f3977e0 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -314,6 +314,9 @@ static int search_roland_implicit_fb(struct usb_device *dev, int ifnum, return 0; } +/* Setup an implicit feedback endpoint from a quirk. Returns 0 if no quirk + * applies. Returns 1 if a quirk was found. + */ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, struct usb_device *dev, struct usb_interface_descriptor *altsd, @@ -384,7 +387,7 @@ add_sync_ep: subs->data_endpoint->sync_master = subs->sync_endpoint; - return 0; + return 1; } static int set_sync_endpoint(struct snd_usb_substream *subs, @@ -423,6 +426,10 @@ static int set_sync_endpoint(struct snd_usb_substream *subs, if (err < 0) return err; + /* endpoint set by quirk */ + if (err > 0) + return 0; + if (altsd->bNumEndpoints < 2) return 0; From fc4aa1ca1628d31d1e2770b96a3bd8e25eef118d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 7 Feb 2019 12:18:02 +0000 Subject: [PATCH 0783/1436] net: vxlan: Free a leaked vetoed multicast rdst When an rdst is rejected by a driver, the current code removes it from the remote list, but neglects to free it. This is triggered by tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh and shows as the following kmemleak trace: unreferenced object 0xffff88817fa3d888 (size 96): comm "softirq", pid 0, jiffies 4372702718 (age 165.252s) hex dump (first 32 bytes): 02 00 00 00 c6 33 64 03 80 f5 a2 61 81 88 ff ff .....3d....a.... 06 df 71 ae ff ff ff ff 0c 00 00 00 04 d2 6a 6b ..q...........jk backtrace: [<00000000296b27ac>] kmem_cache_alloc_trace+0x1ae/0x370 [<0000000075c86dc6>] vxlan_fdb_append.part.12+0x62/0x3b0 [vxlan] [<00000000e0414b63>] vxlan_fdb_update+0xc61/0x1020 [vxlan] [<00000000f330c4bd>] vxlan_fdb_add+0x2e8/0x3d0 [vxlan] [<0000000008f81c2c>] rtnl_fdb_add+0x4c2/0xa10 [<00000000bdc4b270>] rtnetlink_rcv_msg+0x6dd/0x970 [<000000006701f2ce>] netlink_rcv_skb+0x290/0x410 [<00000000c08a5487>] rtnetlink_rcv+0x15/0x20 [<00000000d5f54b1e>] netlink_unicast+0x43f/0x5e0 [<00000000db4336bb>] netlink_sendmsg+0x789/0xcd0 [<00000000e1ee26b6>] sock_sendmsg+0xba/0x100 [<00000000ba409802>] ___sys_sendmsg+0x631/0x960 [<000000003c332113>] __sys_sendmsg+0xea/0x180 [<00000000f4139144>] __x64_sys_sendmsg+0x78/0xb0 [<000000006d1ddc59>] do_syscall_64+0x94/0x410 [<00000000c8defa9a>] entry_SYSCALL_64_after_hwframe+0x49/0xbe Move vxlan_dst_free() up and schedule a call thereof to plug this leak. Fixes: 61f46fe8c646 ("vxlan: Allow vetoing of FDB notifications") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ef45c3c925be..c0cd1c022e77 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -869,6 +869,14 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, call_rcu(&f->rcu, vxlan_fdb_free); } +static void vxlan_dst_free(struct rcu_head *head) +{ + struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); + + dst_cache_destroy(&rd->dst_cache); + kfree(rd); +} + static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, union vxlan_addr *ip, __u16 state, __u16 flags, @@ -941,8 +949,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, err_notify: if ((flags & NLM_F_REPLACE) && rc) *rd = oldrd; - else if ((flags & NLM_F_APPEND) && rc) + else if ((flags & NLM_F_APPEND) && rc) { list_del_rcu(&rd->list); + call_rcu(&rd->rcu, vxlan_dst_free); + } return err; } @@ -1013,14 +1023,6 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan, } } -static void vxlan_dst_free(struct rcu_head *head) -{ - struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); - - dst_cache_destroy(&rd->dst_cache); - kfree(rd); -} - static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, struct vxlan_rdst *rd, bool swdev_notify) { From 8a96669d77897ff3613157bf43f875739205d66d Mon Sep 17 00:00:00 2001 From: Yifeng Li Date: Wed, 6 Feb 2019 15:07:21 +0800 Subject: [PATCH 0784/1436] mips: loongson64: remove unreachable(), fix loongson_poweroff(). On my Yeeloong 8089, I noticed the machine fails to shutdown properly, and often, the function mach_prepare_reboot() is unexpectedly executed, thus the machine reboots instead. A wait loop is needed to ensure the system is in a well-defined state before going down. In commit 997e93d4df16 ("MIPS: Hang more efficiently on halt/powerdown/restart"), a general superset of the wait loop for all platforms is already provided, so we don't need to implement our own. This commit simply removes the unreachable() compiler marco after mach_prepare_reboot(), thus allowing the execution of machine_hang(). My test shows that the machine is now able to shutdown successfully. Please note that there are two different bugs preventing the machine from shutting down, another work-in-progress commit is needed to fix a lockup in cpufreq / i8259 driver, please read Reference, this commit does not fix that bug. Reference: https://lkml.org/lkml/2019/2/5/908 Signed-off-by: Yifeng Li Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Huacai Chen Cc: Ralf Baechle Cc: James Hogan Cc: linux-kernel@vger.kernel.org Cc: Aaro Koskinen Cc: stable@vger.kernel.org # v4.17+ --- arch/mips/loongson64/common/reset.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/mips/loongson64/common/reset.c b/arch/mips/loongson64/common/reset.c index a60715e11306..b26892ce871c 100644 --- a/arch/mips/loongson64/common/reset.c +++ b/arch/mips/loongson64/common/reset.c @@ -59,7 +59,12 @@ static void loongson_poweroff(void) { #ifndef CONFIG_LEFI_FIRMWARE_INTERFACE mach_prepare_shutdown(); - unreachable(); + + /* + * It needs a wait loop here, but mips/kernel/reset.c already calls + * a generic delay loop, machine_hang(), so simply return. + */ + return; #else void (*fw_poweroff)(void) = (void *)loongson_sysconf.poweroff_addr; From 05dc6001af0630e200ad5ea08707187fe5537e6d Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 6 Feb 2019 13:46:17 +0200 Subject: [PATCH 0785/1436] mips: cm: reprime error cause Accordingly to the documentation ---cut--- The GCR_ERROR_CAUSE.ERR_TYPE field and the GCR_ERROR_MULT.ERR_TYPE fields can be cleared by either a reset or by writing the current value of GCR_ERROR_CAUSE.ERR_TYPE to the GCR_ERROR_CAUSE.ERR_TYPE register. ---cut--- Do exactly this. Original value of cm_error may be safely written back; it clears error cause and keeps other bits untouched. Fixes: 3885c2b463f6 ("MIPS: CM: Add support for reporting CM cache errors") Signed-off-by: Vladimir Kondratiev Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ --- arch/mips/kernel/mips-cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 8f5bd04f320a..7f3f136572de 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -457,5 +457,5 @@ void mips_cm_error_report(void) } /* reprime cause register */ - write_gcr_error_cause(0); + write_gcr_error_cause(cm_error); } From 0803de78049fe1b0baf44bcddc727b036fb9139b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Feb 2019 11:55:39 +0100 Subject: [PATCH 0786/1436] blktrace: Show requests without sector Currently, blktrace will not show requests that don't have any data as rq->__sector is initialized to -1 which is out of device range and thus discarded by act_log_check(). This is most notably the case for cache flush requests sent to the device. Fix the problem by making blk_rq_trace_sector() return 0 for requests without initialized sector. Reviewed-by: Johannes Thumshirn Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) From 41c80be24be4eae27571ce524def9467c9705ce5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2019 15:56:14 +0100 Subject: [PATCH 0787/1436] s390/net: move pnet constants There is no need to define these PNETID related constants in the pnet.h file, since they are just used locally within pnet.c. Just code cleanup, no functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- arch/s390/include/asm/pnet.h | 8 -------- arch/s390/net/pnet.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h index 6e278584f8f1..5739276b458d 100644 --- a/arch/s390/include/asm/pnet.h +++ b/arch/s390/include/asm/pnet.h @@ -11,13 +11,5 @@ #include #include -#define PNETIDS_LEN 64 /* Total utility string length in bytes - * to cover up to 4 PNETIDs of 16 bytes - * for up to 4 device ports - */ -#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */ -#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN) - /* Max. # of ports with a PNETID */ - int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid); #endif /* _ASM_S390_PNET_H */ diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c index e22f1b10a6c7..9ecdbdf59781 100644 --- a/arch/s390/net/pnet.c +++ b/arch/s390/net/pnet.c @@ -13,6 +13,14 @@ #include #include +#define PNETIDS_LEN 64 /* Total utility string length in bytes + * to cover up to 4 PNETIDs of 16 bytes + * for up to 4 device ports + */ +#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */ +#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN) + /* Max. # of ports with a PNETID */ + /* * Get the PNETIDs from a device. * s390 hardware supports the definition of a so-called Physical Network From b03faa1fafc8018295401dc558bdc76362d860a4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2019 15:56:15 +0100 Subject: [PATCH 0788/1436] net/smc: postpone release of clcsock According to RFC7609 (http://www.rfc-editor.org/info/rfc7609) first the SMC-R connection is shut down and then the normal TCP connection FIN processing drives cleanup of the internal TCP connection. The unconditional release of the clcsock during active socket closing has to be postponed if the peer has not yet signalled socket closing. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 33 +++++++++++++++++---------------- net/smc/smc_close.c | 7 ++++++- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 369870b0ef79..60ccc8f50368 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -145,32 +145,33 @@ static int smc_release(struct socket *sock) rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { + /* wake up clcsock accept */ + rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + } + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); } sk->sk_prot->unhash(sk); - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { - /* wake up clcsock accept */ - rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + mutex_lock(&smc->clcsock_release_lock); + sock_release(smc->clcsock); + smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); + if (!smc->use_fallback) + smc_conn_free(&smc->conn); } /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk); sock_put(sk); /* final sock_put */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..0e60dd741698 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -405,8 +405,13 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) { + sock_release(smc->clcsock); + smc->clcsock = NULL; + } + } } release_sock(sk); sock_put(sk); /* sock_hold done by schedulers of close_work */ From a225d2cd88d3303e7c6d1481578a4f23d5f92350 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2019 15:56:16 +0100 Subject: [PATCH 0789/1436] net/smc: use smc_curs_copy() for SMC-D SMC already provides a wrapper for atomic64 calls to be architecture independent. Use this wrapper for SMC-D as well. Reported-by: Jens Remus Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index b5bfe38c7f9b..e8c214b992b6 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -245,17 +245,18 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, } static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smcd_cdc_msg *peer) + struct smcd_cdc_msg *peer, + struct smc_connection *conn) { union smc_host_cursor temp; temp.wrap = peer->prod.wrap; temp.count = peer->prod.count; - atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->prod, &temp, conn); temp.wrap = peer->cons.wrap; temp.count = peer->cons.count; - atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->cons, &temp, conn); local->prod_flags = peer->cons.prod_flags; local->conn_state_flags = peer->cons.conn_state_flags; } @@ -265,7 +266,7 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_connection *conn) { if (conn->lgr->is_smcd) - smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn); else smcr_cdc_msg_to_host(local, peer, conn); } From 62c7139f3ed011379fbbef832b4b15e3c10b355f Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 7 Feb 2019 15:56:17 +0100 Subject: [PATCH 0790/1436] net/smc: unlock LGR pending lock earlier for SMC-D If SMC client and server connections are both established at the same time, smc_connect_ism() cannot send a CLC confirm message while smc_listen_work() is waiting for one due to lock contention. This can result in timeouts in smc_clc_wait_msg() and failed SMC connections. In case of SMC-D, the LGR pending lock is not needed while smc_listen_work() is waiting for the CLC confirm message. So, this patch releases the lock earlier for SMC-D to avoid the locking issue. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 60ccc8f50368..cf49ed05007b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1287,24 +1287,28 @@ static void smc_listen_work(struct work_struct *work) return; } + /* SMC-D does not need this lock any more */ + if (ism_supported) + mutex_unlock(&smc_create_lgr_pending); + /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { - mutex_unlock(&smc_create_lgr_pending); + if (!ism_supported) + mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } /* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { - mutex_unlock(&smc_create_lgr_pending); + rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + mutex_unlock(&smc_create_lgr_pending); + if (rc) return; - } } smc_conn_save_peer_info(new_smc, &cclc); - mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); } From 72a36a8aecb520f8af6529476dae16f7b6a3d87e Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 7 Feb 2019 15:56:18 +0100 Subject: [PATCH 0791/1436] net/smc: use client and server LGR pending locks for SMC-R If SMC client and server connections are both established at the same time, smc_connect_rdma() cannot send a CLC confirm message while smc_listen_work() is waiting for one due to lock contention. This can result in timeouts in smc_clc_wait_msg() and failed SMC connections. In case of SMC-R, there are two types of LGRs (client and server LGRs) which can be protected by separate locks. So, this patch splits the LGR pending lock into two separate locks for client and server to avoid the locking issue for SMC-R. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cf49ed05007b..48ea7669161f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -42,8 +42,11 @@ #include "smc_rx.h" #include "smc_close.h" -static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group - * creation +static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group + * creation on server + */ +static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group + * creation on client */ static void smc_tcp_listen_work(struct work_struct *); @@ -477,7 +480,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, { if (local_contact == SMC_FIRST_CONTACT) smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); + if (smc->conn.lgr->is_smcd) + /* there is only one lgr role for SMC-D; use server lock */ + mutex_unlock(&smc_server_lgr_pending); + else + mutex_unlock(&smc_client_lgr_pending); + smc_conn_free(&smc->conn); return reason_code; } @@ -562,7 +570,7 @@ static int smc_connect_rdma(struct smc_sock *smc, struct smc_link *link; int reason_code = 0; - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_client_lgr_pending); local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, ibport, ntoh24(aclc->qpn), &aclc->lcl, NULL, 0); @@ -573,7 +581,8 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - return smc_connect_abort(smc, reason_code, 0); + mutex_unlock(&smc_client_lgr_pending); + return reason_code; } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -617,7 +626,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, reason_code, local_contact); } - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -634,11 +643,14 @@ static int smc_connect_ism(struct smc_sock *smc, int local_contact = SMC_FIRST_CONTACT; int rc = 0; - mutex_lock(&smc_create_lgr_pending); + /* there is only one lgr role for SMC-D; use server lock */ + mutex_lock(&smc_server_lgr_pending); local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, NULL, ismdev, aclc->gid); - if (local_contact < 0) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + if (local_contact < 0) { + mutex_unlock(&smc_server_lgr_pending); + return SMC_CLC_DECL_MEM; + } /* Create send and receive buffers */ if (smc_buf_create(smc, true)) @@ -652,7 +664,7 @@ static int smc_connect_ism(struct smc_sock *smc, rc = smc_clc_send_confirm(smc); if (rc) return smc_connect_abort(smc, rc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -1251,7 +1263,7 @@ static void smc_listen_work(struct work_struct *work) return; } - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -1273,7 +1285,7 @@ static void smc_listen_work(struct work_struct *work) &local_contact) || smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, local_contact); return; @@ -1282,21 +1294,21 @@ static void smc_listen_work(struct work_struct *work) /* send SMC Accept CLC message */ rc = smc_clc_send_accept(new_smc, local_contact); if (rc) { - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, rc, local_contact); return; } /* SMC-D does not need this lock any more */ if (ism_supported) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { if (!ism_supported) - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } @@ -1304,7 +1316,7 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ism_supported) { rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); if (rc) return; } From 8fc002b01a4e3ecc223201c304e3f2217899aff6 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 7 Feb 2019 15:56:19 +0100 Subject: [PATCH 0792/1436] net/smc: move code to clear the conn->lgr field The lgr field of an smc_connection is set in smc_conn_create() and should be cleared in smc_conn_free() for consistency reasons, so move the responsible code. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..a1a6d351ae1b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -118,7 +118,6 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) rb_erase(&conn->alert_node, &lgr->conns_all); lgr->conns_num--; conn->alert_token_local = 0; - conn->lgr = NULL; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ } @@ -331,8 +330,9 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + conn->lgr = NULL; if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); @@ -462,6 +462,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) sock_hold(&smc->sk); /* sock_put in close work */ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + conn->lgr = NULL; write_unlock_bh(&lgr->conns_lock); if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); From 232dc8ef647658a5352da807d9e994e0e03b43cd Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 7 Feb 2019 15:56:20 +0100 Subject: [PATCH 0793/1436] net/smc: original socket family in inet_sock_diag Commit ed75986f4aae ("net/smc: ipv6 support for smc_diag.c") changed the value of the diag_family field. The idea was to indicate the family of the IP address in the inet_diag_sockid field. But the change makes it impossible to distinguish an inet_sock_diag response message from SMC sock_diag response. This patch restores the original behaviour and sends AF_SMC as value of the diag_family field. Fixes: ed75986f4aae ("net/smc: ipv6 support for smc_diag.c") Reported-by: Eugene Syromiatnikov Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_diag.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index dbf64a93d68a..371b4cf31fcd 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,6 +38,7 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + r->diag_family = sk->sk_family; if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); @@ -45,14 +46,12 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); if (sk->sk_protocol == SMCPROTO_SMC) { - r->diag_family = PF_INET; memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_protocol == SMCPROTO_SMC6) { - r->diag_family = PF_INET6; memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, From 120382714c0456037b23b6e0c12f04bf2736e5e4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:05 +0000 Subject: [PATCH 0794/1436] dt-bindings: phy: Armada 38x common phy bindings Add the Marvell Armada 38x common phy bindings. Signed-off-by: Russell King Signed-off-by: David S. Miller --- .../bindings/phy/phy-armada38x-comphy.txt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt diff --git a/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt b/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt new file mode 100644 index 000000000000..ad49e5c01334 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt @@ -0,0 +1,40 @@ +mvebu armada 38x comphy driver +------------------------------ + +This comphy controller can be found on Marvell Armada 38x. It provides a +number of shared PHYs used by various interfaces (network, sata, usb, +PCIe...). + +Required properties: + +- compatible: should be "marvell,armada-380-comphy" +- reg: should contain the comphy register location and length. +- #address-cells: should be 1. +- #size-cells: should be 0. + +A sub-node is required for each comphy lane provided by the comphy. + +Required properties (child nodes): + +- reg: comphy lane number. +- #phy-cells : from the generic phy bindings, must be 1. Defines the + input port to use for a given comphy lane. + +Example: + + comphy: phy@18300 { + compatible = "marvell,armada-380-comphy"; + reg = <0x18300 0x100>; + #address-cells = <1>; + #size-cells = <0>; + + cpm_comphy0: phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + cpm_comphy1: phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + }; From 14dc100b4411a77a0cb63d049caf184434b18377 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:10 +0000 Subject: [PATCH 0795/1436] phy: armada38x: add common phy support Add support for the Armada 38x common phy to allow us to change the speed of the Ethernet serdes lane. This driver only supports manipulation of the speed, it does not support configuration of the common phy. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/phy/marvell/Kconfig | 10 + drivers/phy/marvell/Makefile | 1 + drivers/phy/marvell/phy-armada38x-comphy.c | 237 +++++++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 drivers/phy/marvell/phy-armada38x-comphy.c diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig index 6fb4b56e4c14..224ea4e6a46d 100644 --- a/drivers/phy/marvell/Kconfig +++ b/drivers/phy/marvell/Kconfig @@ -21,6 +21,16 @@ config PHY_BERLIN_USB help Enable this to support the USB PHY on Marvell Berlin SoCs. +config PHY_MVEBU_A38X_COMPHY + tristate "Marvell Armada 38x comphy driver" + depends on ARCH_MVEBU || COMPILE_TEST + depends on OF + select GENERIC_PHY + help + This driver allows to control the comphy, an hardware block providing + shared serdes PHYs on Marvell Armada 38x. Its serdes lanes can be + used by various controllers (Ethernet, sata, usb, PCIe...). + config PHY_MVEBU_CP110_COMPHY tristate "Marvell CP110 comphy driver" depends on ARCH_MVEBU || COMPILE_TEST diff --git a/drivers/phy/marvell/Makefile b/drivers/phy/marvell/Makefile index 3975b144f8ec..59b6c03ef756 100644 --- a/drivers/phy/marvell/Makefile +++ b/drivers/phy/marvell/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_ARMADA375_USBCLUSTER_PHY) += phy-armada375-usb2.o obj-$(CONFIG_PHY_BERLIN_SATA) += phy-berlin-sata.o obj-$(CONFIG_PHY_BERLIN_USB) += phy-berlin-usb.o +obj-$(CONFIG_PHY_MVEBU_A38X_COMPHY) += phy-armada38x-comphy.o obj-$(CONFIG_PHY_MVEBU_CP110_COMPHY) += phy-mvebu-cp110-comphy.o obj-$(CONFIG_PHY_MVEBU_SATA) += phy-mvebu-sata.o obj-$(CONFIG_PHY_PXA_28NM_HSIC) += phy-pxa-28nm-hsic.o diff --git a/drivers/phy/marvell/phy-armada38x-comphy.c b/drivers/phy/marvell/phy-armada38x-comphy.c new file mode 100644 index 000000000000..3e00bc679d4e --- /dev/null +++ b/drivers/phy/marvell/phy-armada38x-comphy.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Russell King, Deep Blue Solutions Ltd. + * + * Partly derived from CP110 comphy driver by Antoine Tenart + * + */ +#include +#include +#include +#include +#include +#include + +#define MAX_A38X_COMPHY 6 +#define MAX_A38X_PORTS 3 + +#define COMPHY_CFG1 0x00 +#define COMPHY_CFG1_GEN_TX(x) ((x) << 26) +#define COMPHY_CFG1_GEN_TX_MSK COMPHY_CFG1_GEN_TX(15) +#define COMPHY_CFG1_GEN_RX(x) ((x) << 22) +#define COMPHY_CFG1_GEN_RX_MSK COMPHY_CFG1_GEN_RX(15) +#define GEN_SGMII_1_25GBPS 6 +#define GEN_SGMII_3_125GBPS 8 + +#define COMPHY_STAT1 0x18 +#define COMPHY_STAT1_PLL_RDY_TX BIT(3) +#define COMPHY_STAT1_PLL_RDY_RX BIT(2) + +#define COMPHY_SELECTOR 0xfc + +struct a38x_comphy; + +struct a38x_comphy_lane { + void __iomem *base; + struct a38x_comphy *priv; + unsigned int n; + + int port; +}; + +struct a38x_comphy { + void __iomem *base; + struct device *dev; + struct a38x_comphy_lane lane[MAX_A38X_COMPHY]; +}; + +static const u8 gbe_mux[MAX_A38X_COMPHY][MAX_A38X_PORTS] = { + { 0, 0, 0 }, + { 4, 5, 0 }, + { 0, 4, 0 }, + { 0, 0, 4 }, + { 0, 3, 0 }, + { 0, 0, 3 }, +}; + +static void a38x_comphy_set_reg(struct a38x_comphy_lane *lane, + unsigned int offset, u32 mask, u32 value) +{ + u32 val; + + val = readl_relaxed(lane->base + offset) & ~mask; + writel(val | value, lane->base + offset); +} + +static void a38x_comphy_set_speed(struct a38x_comphy_lane *lane, + unsigned int gen_tx, unsigned int gen_rx) +{ + a38x_comphy_set_reg(lane, COMPHY_CFG1, + COMPHY_CFG1_GEN_TX_MSK | COMPHY_CFG1_GEN_RX_MSK, + COMPHY_CFG1_GEN_TX(gen_tx) | + COMPHY_CFG1_GEN_RX(gen_rx)); +} + +static int a38x_comphy_poll(struct a38x_comphy_lane *lane, + unsigned int offset, u32 mask, u32 value) +{ + u32 val; + int ret; + + ret = readl_relaxed_poll_timeout_atomic(lane->base + offset, val, + (val & mask) == value, + 1000, 150000); + + if (ret) + dev_err(lane->priv->dev, + "comphy%u: timed out waiting for status\n", lane->n); + + return ret; +} + +/* + * We only support changing the speed for comphys configured for GBE. + * Since that is all we do, we only poll for PLL ready status. + */ +static int a38x_comphy_set_mode(struct phy *phy, enum phy_mode mode, int sub) +{ + struct a38x_comphy_lane *lane = phy_get_drvdata(phy); + unsigned int gen; + + if (mode != PHY_MODE_ETHERNET) + return -EINVAL; + + switch (sub) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + gen = GEN_SGMII_1_25GBPS; + break; + + case PHY_INTERFACE_MODE_2500BASEX: + gen = GEN_SGMII_3_125GBPS; + break; + + default: + return -EINVAL; + } + + a38x_comphy_set_speed(lane, gen, gen); + + return a38x_comphy_poll(lane, COMPHY_STAT1, + COMPHY_STAT1_PLL_RDY_TX | + COMPHY_STAT1_PLL_RDY_RX, + COMPHY_STAT1_PLL_RDY_TX | + COMPHY_STAT1_PLL_RDY_RX); +} + +static const struct phy_ops a38x_comphy_ops = { + .set_mode = a38x_comphy_set_mode, + .owner = THIS_MODULE, +}; + +static struct phy *a38x_comphy_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct a38x_comphy_lane *lane; + struct phy *phy; + u32 val; + + if (WARN_ON(args->args[0] >= MAX_A38X_PORTS)) + return ERR_PTR(-EINVAL); + + phy = of_phy_simple_xlate(dev, args); + if (IS_ERR(phy)) + return phy; + + lane = phy_get_drvdata(phy); + if (lane->port >= 0) + return ERR_PTR(-EBUSY); + + lane->port = args->args[0]; + + val = readl_relaxed(lane->priv->base + COMPHY_SELECTOR); + val = (val >> (4 * lane->n)) & 0xf; + + if (!gbe_mux[lane->n][lane->port] || + val != gbe_mux[lane->n][lane->port]) { + dev_warn(lane->priv->dev, + "comphy%u: not configured for GBE\n", lane->n); + phy = ERR_PTR(-EINVAL); + } + + return phy; +} + +static int a38x_comphy_probe(struct platform_device *pdev) +{ + struct phy_provider *provider; + struct device_node *child; + struct a38x_comphy *priv; + struct resource *res; + void __iomem *base; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + priv->dev = &pdev->dev; + priv->base = base; + + for_each_available_child_of_node(pdev->dev.of_node, child) { + struct phy *phy; + int ret; + u32 val; + + ret = of_property_read_u32(child, "reg", &val); + if (ret < 0) { + dev_err(&pdev->dev, "missing 'reg' property (%d)\n", + ret); + continue; + } + + if (val >= MAX_A38X_COMPHY || priv->lane[val].base) { + dev_err(&pdev->dev, "invalid 'reg' property\n"); + continue; + } + + phy = devm_phy_create(&pdev->dev, child, &a38x_comphy_ops); + if (IS_ERR(phy)) + return PTR_ERR(phy); + + priv->lane[val].base = base + 0x28 * val; + priv->lane[val].priv = priv; + priv->lane[val].n = val; + priv->lane[val].port = -1; + phy_set_drvdata(phy, &priv->lane[val]); + } + + dev_set_drvdata(&pdev->dev, priv); + + provider = devm_of_phy_provider_register(&pdev->dev, a38x_comphy_xlate); + + return PTR_ERR_OR_ZERO(provider); +} + +static const struct of_device_id a38x_comphy_of_match_table[] = { + { .compatible = "marvell,armada-380-comphy" }, + { }, +}; +MODULE_DEVICE_TABLE(of, a38x_comphy_of_match_table); + +static struct platform_driver a38x_comphy_driver = { + .probe = a38x_comphy_probe, + .driver = { + .name = "armada-38x-comphy", + .of_match_table = a38x_comphy_of_match_table, + }, +}; +module_platform_driver(a38x_comphy_driver); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("Common PHY driver for Armada 38x SoCs"); +MODULE_LICENSE("GPL v2"); From f3a6a9f3704aa9665c14b015d63e0f92a0533a3f Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:15 +0000 Subject: [PATCH 0796/1436] ARM: dts: add description for Armada 38x common phy Add the DT description for the Armada 38x common phy. Signed-off-by: Russell King Signed-off-by: David S. Miller --- arch/arm/boot/dts/armada-38x.dtsi | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 929459c42760..7b2e2bd6479b 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -335,6 +335,43 @@ #clock-cells = <1>; }; + comphy: phy@18300 { + compatible = "marvell,armada-380-comphy"; + reg = <0x18300 0x100>; + #address-cells = <1>; + #size-cells = <0>; + + comphy0: phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + comphy1: phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + + comphy2: phy@2 { + reg = <2>; + #phy-cells = <1>; + }; + + comphy3: phy@3 { + reg = <3>; + #phy-cells = <1>; + }; + + comphy4: phy@4 { + reg = <4>; + #phy-cells = <1>; + }; + + comphy5: phy@5 { + reg = <5>; + #phy-cells = <1>; + }; + }; + coreclk: mvebu-sar@18600 { compatible = "marvell,armada-380-core-clock"; reg = <0x18600 0x04>; From 4ca124f4d96d7c976f2753c874d095c0de83d280 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:21 +0000 Subject: [PATCH 0797/1436] dt-bindings: net: mvneta: add phys property Add an optional phys property to the mvneta binding documentation for the common phy. Reviewed-by: Rob Herring Signed-off-by: Russell King Signed-off-by: David S. Miller --- .../devicetree/bindings/net/marvell-armada-370-neta.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index bedcfd5a52cd..691f886cfc4a 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -19,7 +19,7 @@ Optional properties: "marvell,armada-370-neta" and 9800B for others. - clock-names: List of names corresponding to clocks property; shall be "core" for core clock and "bus" for the optional bus clock. - +- phys: comphy for the ethernet port, see ../phy/phy-bindings.txt Optional properties (valid only for Armada XP/38x): From a10c1c8191e04c21769656c2ca8e1c69a6218954 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:26 +0000 Subject: [PATCH 0798/1436] net: marvell: neta: add comphy support Add support for the common phy binding, so that we can reconfigure the comphy according to the desired ethernet speed. This will allow us to support 1000base-X and 2500base-X SFPs dynamically on SolidRun Clearfog. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 45 ++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 9d4568eb2297..d134d3538f9b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -436,6 +437,7 @@ struct mvneta_port { struct device_node *dn; unsigned int tx_csum_limit; struct phylink *phylink; + struct phy *comphy; struct mvneta_bm *bm_priv; struct mvneta_bm_pool *pool_long; @@ -3151,6 +3153,8 @@ static void mvneta_start_dev(struct mvneta_port *pp) { int cpu; + WARN_ON(phy_power_on(pp->comphy)); + mvneta_max_rx_size_set(pp, pp->pkt_size); mvneta_txq_max_tx_size_set(pp, pp->pkt_size); @@ -3213,6 +3217,8 @@ static void mvneta_stop_dev(struct mvneta_port *pp) mvneta_tx_reset(pp); mvneta_rx_reset(pp); + + WARN_ON(phy_power_off(pp->comphy)); } static void mvneta_percpu_enable(void *arg) @@ -3338,6 +3344,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) static void mvneta_validate(struct net_device *ndev, unsigned long *supported, struct phylink_link_state *state) { + struct mvneta_port *pp = netdev_priv(ndev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; /* We only support QSGMII, SGMII, 802.3z and RGMII modes */ @@ -3358,8 +3365,13 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, phylink_set(mask, Pause); /* Half-duplex at speeds higher than 100Mbit is unsupported */ - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); + if (pp->comphy || state->interface != PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } + if (pp->comphy || state->interface == PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 2500baseX_Full); + } if (!phy_interface_mode_is_8023z(state->interface)) { /* 10M and 100M are only supported in non-802.3z mode */ @@ -3373,6 +3385,11 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + + /* We can only operate at 2500BaseX or 1000BaseX. If requested + * to advertise both, only report advertising at 2500BaseX. + */ + phylink_helper_basex_speed(state); } static int mvneta_mac_link_state(struct net_device *ndev, @@ -3384,7 +3401,9 @@ static int mvneta_mac_link_state(struct net_device *ndev, gmac_stat = mvreg_read(pp, MVNETA_GMAC_STATUS); if (gmac_stat & MVNETA_GMAC_SPEED_1000) - state->speed = SPEED_1000; + state->speed = + state->interface == PHY_INTERFACE_MODE_2500BASEX ? + SPEED_2500 : SPEED_1000; else if (gmac_stat & MVNETA_GMAC_SPEED_100) state->speed = SPEED_100; else @@ -3499,12 +3518,20 @@ static void mvneta_mac_config(struct net_device *ndev, unsigned int mode, MVNETA_GMAC_FORCE_LINK_DOWN); } + /* When at 2.5G, the link partner can send frames with shortened * preambles. */ if (state->speed == SPEED_2500) new_ctrl4 |= MVNETA_GMAC4_SHORT_PREAMBLE_ENABLE; + if (pp->comphy && + (state->interface == PHY_INTERFACE_MODE_SGMII || + state->interface == PHY_INTERFACE_MODE_1000BASEX || + state->interface == PHY_INTERFACE_MODE_2500BASEX)) + WARN_ON(phy_set_mode_ext(pp->comphy, PHY_MODE_ETHERNET, + state->interface)); + if (new_ctrl0 != gmac_ctrl0) mvreg_write(pp, MVNETA_GMAC_CTRL_0, new_ctrl0); if (new_ctrl2 != gmac_ctrl2) @@ -4404,7 +4431,7 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) if (phy_mode == PHY_INTERFACE_MODE_QSGMII) mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_QSGMII_SERDES_PROTO); else if (phy_mode == PHY_INTERFACE_MODE_SGMII || - phy_mode == PHY_INTERFACE_MODE_1000BASEX) + phy_interface_mode_is_8023z(phy_mode)) mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); else if (!phy_interface_mode_is_rgmii(phy_mode)) return -EINVAL; @@ -4421,6 +4448,7 @@ static int mvneta_probe(struct platform_device *pdev) struct mvneta_port *pp; struct net_device *dev; struct phylink *phylink; + struct phy *comphy; const char *dt_mac_addr; char hw_mac_addr[ETH_ALEN]; const char *mac_from; @@ -4446,6 +4474,14 @@ static int mvneta_probe(struct platform_device *pdev) goto err_free_irq; } + comphy = devm_of_phy_get(&pdev->dev, dn, NULL); + if (comphy == ERR_PTR(-EPROBE_DEFER)) { + err = -EPROBE_DEFER; + goto err_free_irq; + } else if (IS_ERR(comphy)) { + comphy = NULL; + } + phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode, &mvneta_phylink_ops); if (IS_ERR(phylink)) { @@ -4462,6 +4498,7 @@ static int mvneta_probe(struct platform_device *pdev) pp = netdev_priv(dev); spin_lock_init(&pp->lock); pp->phylink = phylink; + pp->comphy = comphy; pp->phy_interface = phy_mode; pp->dn = dn; From f548ced15f909169f748dff9991eefbf7648b7f9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 7 Feb 2019 16:19:31 +0000 Subject: [PATCH 0799/1436] ARM: dts: clearfog: add comphy settings for Ethernet interfaces Add the comphy settings for the Ethernet interfaces. Signed-off-by: Russell King Signed-off-by: David S. Miller --- arch/arm/boot/dts/armada-388-clearfog.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/armada-388-clearfog.dtsi b/arch/arm/boot/dts/armada-388-clearfog.dtsi index 1b0d0680c8b6..0d81600ca247 100644 --- a/arch/arm/boot/dts/armada-388-clearfog.dtsi +++ b/arch/arm/boot/dts/armada-388-clearfog.dtsi @@ -93,6 +93,7 @@ bm,pool-long = <2>; bm,pool-short = <1>; buffer-manager = <&bm>; + phys = <&comphy1 1>; phy-mode = "sgmii"; status = "okay"; }; @@ -103,6 +104,7 @@ bm,pool-short = <1>; buffer-manager = <&bm>; managed = "in-band-status"; + phys = <&comphy5 2>; phy-mode = "sgmii"; sfp = <&sfp>; status = "okay"; From 71bd106d2567675668e253cba3960e3c4bf2e80e Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 7 Feb 2019 09:52:10 -0800 Subject: [PATCH 0800/1436] net: fixed-phy: Add fixed_phy_register_with_gpiod() API Add fixed_phy_register_with_gpiod() API. It lets users create a fixed_phy instance that uses a GPIO descriptor which was obtained externally e.g. through platform data. This enables platform devices (non-DT based) to use GPIOs for link status. Signed-off-by: Moritz Fischer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 32 +++++++++++++++++++++++++------- include/linux/phy_fixed.h | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index d810f914aaa4..b0d1368c3400 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -229,12 +229,12 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) } #endif -struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np) +static struct phy_device *__fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np, + struct gpio_desc *gpiod) { struct fixed_mdio_bus *fmb = &platform_fmb; - struct gpio_desc *gpiod = NULL; struct phy_device *phy; int phy_addr; int ret; @@ -243,9 +243,11 @@ struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(-EPROBE_DEFER); /* Check if we have a GPIO associated with this fixed phy */ - gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) { + gpiod = fixed_phy_get_gpiod(np); + if (IS_ERR(gpiod)) + return ERR_CAST(gpiod); + } /* Get the next available PHY address, up to PHY_MAX_ADDR */ phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); @@ -308,8 +310,24 @@ struct phy_device *fixed_phy_register(unsigned int irq, return phy; } + +struct phy_device *fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + return __fixed_phy_register(irq, status, np, NULL); +} EXPORT_SYMBOL_GPL(fixed_phy_register); +struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod) +{ + return __fixed_phy_register(irq, status, NULL, gpiod); +} +EXPORT_SYMBOL_GPL(fixed_phy_register_with_gpiod); + void fixed_phy_unregister(struct phy_device *phy) { phy_device_remove(phy); diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index c78fc203db43..1e5d86ebdaeb 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -19,6 +19,12 @@ extern int fixed_phy_add(unsigned int irq, int phy_id, extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); + +extern struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod); + extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, @@ -35,6 +41,15 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq, { return ERR_PTR(-ENODEV); } + +static inline struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod) +{ + return ERR_PTR(-ENODEV); +} + static inline void fixed_phy_unregister(struct phy_device *phydev) { } From 998a8a8387ff5f65da456d1fc448dbb926fb5d78 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 7 Feb 2019 21:41:46 +0100 Subject: [PATCH 0801/1436] net: phy: let genphy_c45_read_link manage the devices to check Let genphy_c45_read_link manage the devices to check, this removes overhead from callers. Add C22EXT to the list of excluded devices because it doesn't implement the status register. According to the 802.3 clause 45 spec registers 29.0 - 29.4 are reserved. At the moment we have very few clause 45 PHY drivers, so we are lacking experience whether other drivers will have to exclude further devices, or may need to check PHY XS. If we should figure out that list of devices to check needs to be configurable, I think best will be to add a device list member to struct phy_driver. v2: - adjusted commit message - exclude also device C22EXT from link checking Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 10 +--------- drivers/net/phy/phy-c45.c | 18 ++++++++++-------- include/linux/phy.h | 2 +- include/uapi/linux/mdio.h | 2 ++ 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 296a537cdfcb..96a79c6c7810 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -428,16 +428,8 @@ static int mv3310_read_10gbr_status(struct phy_device *phydev) static int mv3310_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; int val; - /* The vendor devads do not report link status. Avoid the PHYXS - * instance as there are three, and its status depends on the MAC - * being appropriately configured for the negotiated speed. - */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2) | - BIT(MDIO_MMD_PHYXS)); - phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; linkmode_zero(phydev->lp_advertising); @@ -453,7 +445,7 @@ static int mv3310_read_status(struct phy_device *phydev) if (val & MDIO_STAT1_LSTATUS) return mv3310_read_10gbr_status(phydev); - val = genphy_c45_read_link(phydev, mmd_mask); + val = genphy_c45_read_link(phydev); if (val < 0) return val; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index c92d0fb7ec4f..6adfe1f6319e 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -118,17 +118,24 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done); /** * genphy_c45_read_link - read the overall link status from the MMDs * @phydev: target phy_device struct - * @mmd_mask: MMDs to read status from * * Read the link status from the specified MMDs, and if they all indicate * that the link is up, set phydev->link to 1. If an error is encountered, * a negative errno will be returned, otherwise zero. */ -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) +int genphy_c45_read_link(struct phy_device *phydev) { + u32 mmd_mask = phydev->c45_ids.devices_in_package; int val, devad; bool link = true; + /* The vendor devads and C22EXT do not report link status. Avoid the + * PHYXS instance as its status may depend on the MAC being + * appropriately configured for the negotiated speed. + */ + mmd_mask &= ~(MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2 | MDIO_DEVS_C22EXT | + MDIO_DEVS_PHYXS); + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); @@ -266,16 +273,11 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg); int gen10g_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; - /* For now just lie and say it's 10G all the time */ phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; - /* Avoid reading the vendor MMDs */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2)); - - return genphy_c45_read_link(phydev, mmd_mask); + return genphy_c45_read_link(phydev); } EXPORT_SYMBOL_GPL(gen10g_read_status); diff --git a/include/linux/phy.h b/include/linux/phy.h index 237dd035858a..f41bf651f6a0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1094,7 +1094,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); int genphy_c45_aneg_done(struct phy_device *phydev); -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask); +int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index d435b00d64ad..2e6e309f0847 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -123,6 +123,8 @@ #define MDIO_DEVS_TC MDIO_DEVS_PRESENT(MDIO_MMD_TC) #define MDIO_DEVS_AN MDIO_DEVS_PRESENT(MDIO_MMD_AN) #define MDIO_DEVS_C22EXT MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT) +#define MDIO_DEVS_VEND1 MDIO_DEVS_PRESENT(MDIO_MMD_VEND1) +#define MDIO_DEVS_VEND2 MDIO_DEVS_PRESENT(MDIO_MMD_VEND2) /* Control register 2. */ #define MDIO_PMA_CTRL2_TYPE 0x000f /* PMA/PMD type selection */ From 39841cc1cbb69344539c98a1fa9d858ed124c7ba Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Thu, 7 Feb 2019 16:01:18 -0800 Subject: [PATCH 0802/1436] net: dsa: b53: Fix for failure when irq is not defined in dt Fixes the issues with non BCM58XX chips in the b53 driver failing, when the irq is not specified in the device tree. Removed the check for BCM58XX in b53_srab_prepare_irq(), so the 'port->irq' will be set to '-EXIO' if the irq is not specified in the device tree. Fixes: 16994374a6fc ("net: dsa: b53: Make SRAB driver manage port interrupts") Fixes: b2ddc48a81b5 ("net: dsa: b53: Do not fail when IRQ are not initialized") Signed-off-by: Arun Parameswaran Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_srab.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 90f514252987..d9c56a779c08 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -511,9 +511,6 @@ static void b53_srab_prepare_irq(struct platform_device *pdev) /* Clear all pending interrupts */ writel(0xffffffff, priv->regs + B53_SRAB_INTR); - if (dev->pdata && dev->pdata->chip_id != BCM58XX_DEVICE_ID) - return; - for (i = 0; i < B53_N_PORTS; i++) { port = &priv->port_intrs[i]; From 62b8cea62e8bad0511260faab8e8de04c76a69af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Feb 2019 11:29:24 -0800 Subject: [PATCH 0803/1436] tools/bpf: add missing strings.h include Few files in libbpf are using bzero() function (defined in strings.h header), but don't include corresponding header. When libbpf is added as a dependency to pahole, this undeterministically causes warnings on some machines: bpf.c:225:2: warning: implicit declaration of function 'bzero' [-Wimplicit-function-declaration] bzero(&attr, sizeof(attr)); ^~~~~ Signed-off-by: Andrii Nakryiko Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 1 + tools/lib/bpf/btf.c | 1 + tools/lib/bpf/libbpf.c | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3defad77dc7a..92fd27fe0599 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -22,6 +22,7 @@ */ #include +#include #include #include #include diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ab6528c935a1..4324eb47d214 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 47969aa0faf8..8d64ada5f728 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include From a4021a3579c52d5a5131820aeb94f531a7b082a7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 7 Feb 2019 09:34:51 -0800 Subject: [PATCH 0804/1436] tools/bpf: add log_level to bpf_load_program_attr The kernel verifier has three levels of logs: 0: no logs 1: logs mostly useful > 1: verbose Current libbpf API functions bpf_load_program_xattr() and bpf_load_program() cannot specify log_level. The bcc, however, provides an interface for user to specify log_level 2 for verbose output. This patch added log_level into structure bpf_load_program_attr, so users, including bcc, can use bpf_load_program_xattr() to change log_level. The supported log_level is 0, 1, and 2. The bpf selftest test_sock.c is modified to enable log_level = 2. If the "verbose" in test_sock.c is changed to true, the test will output logs like below: $ ./test_sock func#0 @0 0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 0: (bf) r6 = r1 1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 1: (61) r7 = *(u32 *)(r6 +28) invalid bpf_context access off=28 size=4 Test case: bind4 load with invalid access: src_ip6 .. [PASS] ... Test case: bind6 allow all .. [PASS] Summary: 16 PASSED, 0 FAILED Some test_sock tests are negative tests and verbose verifier log will be printed out as shown in the above. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 22 +++++++++++++++++----- tools/lib/bpf/bpf.h | 1 + tools/testing/selftests/bpf/test_sock.c | 9 ++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 92fd27fe0599..a5261f39e2bd 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -215,10 +215,15 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, { void *finfo = NULL, *linfo = NULL; union bpf_attr attr; + __u32 log_level; __u32 name_len; int fd; - if (!load_attr) + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + log_level = load_attr->log_level; + if (log_level > 2 || (log_level && !log_buf)) return -EINVAL; name_len = load_attr->name ? strlen(load_attr->name) : 0; @@ -229,9 +234,16 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; - attr.log_level = 0; + + attr.log_level = log_level; + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + } else { + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + } + attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; attr.prog_btf_fd = load_attr->prog_btf_fd; @@ -287,7 +299,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, goto done; } - if (!log_buf || !log_buf_sz) + if (log_level || !log_buf) goto done; /* Try again with log */ diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ed09eed2dc3b..6ffdd79bea89 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -85,6 +85,7 @@ struct bpf_load_program_attr { __u32 line_info_rec_size; const void *line_info; __u32 line_info_cnt; + __u32 log_level; }; /* Flags to direct loading requirements */ diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 561ffb6d6433..fb679ac3d4b0 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -20,6 +20,7 @@ #define MAX_INSNS 512 char bpf_log_buf[BPF_LOG_BUF_SIZE]; +static bool verbose = false; struct sock_test { const char *descr; @@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog, enum bpf_attach_type attach_type) { struct bpf_load_program_attr attr; + int ret; memset(&attr, 0, sizeof(struct bpf_load_program_attr)); attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; @@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog, attr.insns = prog; attr.insns_cnt = probe_prog_length(attr.insns); attr.license = "GPL"; + attr.log_level = 2; - return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + if (verbose && ret < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return ret; } static int attach_sock_prog(int cgfd, int progfd, From 1358c13a48c43f5e4de0c1835291837a27b9720c Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 7 Feb 2019 15:36:11 +0200 Subject: [PATCH 0805/1436] crypto: ccree - fix resume race condition on init We were enabling autosuspend, which is using data set by the hash module, prior to the hash module being inited, casuing a crash on resume as part of the startup sequence if the race was lost. This was never a real problem because the PM infra was using low res timers so we were always winning the race, until commit 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") changed that :-) Fix this by seperating the PM setup and enablement and doing the latter only at the end of the init sequence. Signed-off-by: Gilad Ben-Yossef Cc: Vincent Guittot Cc: stable@kernel.org # v4.20 Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 7 ++++--- drivers/crypto/ccree/cc_pm.c | 13 ++++++------- drivers/crypto/ccree/cc_pm.h | 3 +++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 8ada308d72ee..b0125ad65825 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -380,7 +380,7 @@ static int init_cc_resources(struct platform_device *plat_dev) rc = cc_ivgen_init(new_drvdata); if (rc) { dev_err(dev, "cc_ivgen_init failed\n"); - goto post_power_mgr_err; + goto post_buf_mgr_err; } /* Allocate crypto algs */ @@ -403,6 +403,9 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_hash_err; } + /* All set, we can allow autosuspend */ + cc_pm_go(new_drvdata); + /* If we got here and FIPS mode is enabled * it means all FIPS test passed, so let TEE * know we're good. @@ -417,8 +420,6 @@ post_cipher_err: cc_cipher_free(new_drvdata); post_ivgen_err: cc_ivgen_fini(new_drvdata); -post_power_mgr_err: - cc_pm_fini(new_drvdata); post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index d990f472e89f..6ff7e75ad90e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -100,20 +100,19 @@ int cc_pm_put_suspend(struct device *dev) int cc_pm_init(struct cc_drvdata *drvdata) { - int rc = 0; struct device *dev = drvdata_to_dev(drvdata); /* must be before the enabling to avoid resdundent suspending */ pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); /* activate the PM module */ - rc = pm_runtime_set_active(dev); - if (rc) - return rc; - /* enable the PM module*/ - pm_runtime_enable(dev); + return pm_runtime_set_active(dev); +} - return rc; +/* enable the PM module*/ +void cc_pm_go(struct cc_drvdata *drvdata) +{ + pm_runtime_enable(drvdata_to_dev(drvdata)); } void cc_pm_fini(struct cc_drvdata *drvdata) diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 020a5403c58b..f62624357020 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -16,6 +16,7 @@ extern const struct dev_pm_ops ccree_pm; int cc_pm_init(struct cc_drvdata *drvdata); +void cc_pm_go(struct cc_drvdata *drvdata); void cc_pm_fini(struct cc_drvdata *drvdata); int cc_pm_suspend(struct device *dev); int cc_pm_resume(struct device *dev); @@ -29,6 +30,8 @@ static inline int cc_pm_init(struct cc_drvdata *drvdata) return 0; } +static void cc_pm_go(struct cc_drvdata *drvdata) {} + static inline void cc_pm_fini(struct cc_drvdata *drvdata) {} static inline int cc_pm_suspend(struct device *dev) From e8b22d0a329f0fb5c7ef95406872d268f01ee3b1 Mon Sep 17 00:00:00 2001 From: Matti Kurkela Date: Thu, 7 Feb 2019 23:49:23 -0800 Subject: [PATCH 0806/1436] Input: elantech - enable 3rd button support on Fujitsu CELSIUS H780 Like Fujitsu CELSIUS H760, the H780 also has a three-button Elantech touchpad, but the driver needs to be told so to enable the middle touchpad button. The elantech_dmi_force_crc_enabled quirk was not necessary with the H780. Also document the fw_version and caps values detected for both H760 and H780 models. Signed-off-by: Matti Kurkela Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 9fe075c137dc..a7f8b1614559 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1119,6 +1119,8 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus UX31 0x361f00 20, 15, 0e clickpad * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad + * Fujitsu CELSIUS H760 0x570f02 40, 14, 0c 3 hw buttons (**) + * Fujitsu CELSIUS H780 0x5d0f02 41, 16, 0d 3 hw buttons (**) * Fujitsu LIFEBOOK E544 0x470f00 d0, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E546 0x470f00 50, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E547 0x470f00 50, 12, 09 2 hw buttons @@ -1171,6 +1173,13 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H760"), }, }, + { + /* Fujitsu H780 also has a middle button */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H780"), + }, + }, #endif { } }; From dcf6e2e38a1c7ccbc535de5e1d9b14998847499d Mon Sep 17 00:00:00 2001 From: Zachary Hays Date: Thu, 7 Feb 2019 10:03:08 -0500 Subject: [PATCH 0807/1436] mmc: block: handle complete_work on separate workqueue The kblockd workqueue is created with the WQ_MEM_RECLAIM flag set. This generates a rescuer thread for that queue that will trigger when the CPU is under heavy load and collect the uncompleted work. In the case of mmc, this creates the possibility of a deadlock when there are multiple partitions on the device as other blk-mq work is also run on the same queue. For example: - worker 0 claims the mmc host to work on partition 1 - worker 1 attempts to claim the host for partition 2 but has to wait for worker 0 to finish - worker 0 schedules complete_work to release the host - rescuer thread is triggered after time-out and collects the dangling work - rescuer thread attempts to complete the work in order starting with claim host - the task to release host is now blocked by a task to claim it and will never be called The above results in multiple hung tasks that lead to failures to mount partitions. Handling complete_work on a separate workqueue avoids this by keeping the work completion tasks separate from the other blk-mq work. This allows the host to be released without getting blocked by other tasks attempting to claim the host. Signed-off-by: Zachary Hays Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 10 +++++++++- include/linux/mmc/card.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index aef1185f383d..14f3fdb8c6bb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2112,7 +2112,7 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) if (waiting) wake_up(&mq->wait); else - kblockd_schedule_work(&mq->complete_work); + queue_work(mq->card->complete_wq, &mq->complete_work); return; } @@ -2924,6 +2924,13 @@ static int mmc_blk_probe(struct mmc_card *card) mmc_fixup_device(card, mmc_blk_fixups); + card->complete_wq = alloc_workqueue("mmc_complete", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (unlikely(!card->complete_wq)) { + pr_err("Failed to create mmc completion workqueue"); + return -ENOMEM; + } + md = mmc_blk_alloc(card); if (IS_ERR(md)) return PTR_ERR(md); @@ -2987,6 +2994,7 @@ static void mmc_blk_remove(struct mmc_card *card) pm_runtime_put_noidle(&card->dev); mmc_blk_remove_req(md); dev_set_drvdata(&card->dev, NULL); + destroy_workqueue(card->complete_wq); } static int _mmc_blk_suspend(struct mmc_card *card) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index de7377815b6b..8ef330027b13 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -308,6 +308,7 @@ struct mmc_card { unsigned int nr_parts; unsigned int bouncesz; /* Bounce buffer size */ + struct workqueue_struct *complete_wq; /* Private workqueue */ }; static inline bool mmc_large_sector(struct mmc_card *card) From 6f568ebe2afefdc33a6fb06ef20a94f8b96455f1 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 6 Feb 2019 10:56:02 -0800 Subject: [PATCH 0808/1436] futex: Fix barrier comment The current comment for the barrier that guarantees that waiter increment is always before taking the hb spinlock (barrier (A)) needs to be fixed as it is misplaced. This is obviously referring to hb_waiters_inc, which is a full barrier. Reported-by: Peter Zijlstra Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190206185602.949-1-dave@stgolabs.net --- kernel/futex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index fdd312da0992..5ec2473a3497 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) * decrement the counter at queue_unlock() when some error has * occurred and we don't end up adding the task to the list. */ - hb_waiters_inc(hb); + hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + spin_lock(&hb->lock); return hb; } From 1a1fb985f2e2b85ec0d3dc2e519ee48389ec2434 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Jan 2019 23:15:12 +0100 Subject: [PATCH 0809/1436] futex: Handle early deadlock return correctly commit 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") changed the locking rules in the futex code so that the hash bucket lock is not longer held while the waiter is enqueued into the rtmutex wait list. This made the lock and the unlock path symmetric, but unfortunately the possible early exit from __rt_mutex_proxy_start() due to a detected deadlock was not updated accordingly. That allows a concurrent unlocker to observe inconsitent state which triggers the warning in the unlock path. futex_lock_pi() futex_unlock_pi() lock(hb->lock) queue(hb_waiter) lock(hb->lock) lock(rtmutex->wait_lock) unlock(hb->lock) // acquired hb->lock hb_waiter = futex_top_waiter() lock(rtmutex->wait_lock) __rt_mutex_proxy_start() ---> fail remove(rtmutex_waiter); ---> returns -EDEADLOCK unlock(rtmutex->wait_lock) // acquired wait_lock wake_futex_pi() rt_mutex_next_owner() --> returns NULL --> WARN lock(hb->lock) unqueue(hb_waiter) The problem is caused by the remove(rtmutex_waiter) in the failure case of __rt_mutex_proxy_start() as this lets the unlocker observe a waiter in the hash bucket but no waiter on the rtmutex, i.e. inconsistent state. The original commit handles this correctly for the other early return cases (timeout, signal) by delaying the removal of the rtmutex waiter until the returning task reacquired the hash bucket lock. Treat the failure case of __rt_mutex_proxy_start() in the same way and let the existing cleanup code handle the eventual handover of the rtmutex gracefully. The regular rt_mutex_proxy_start() gains the rtmutex waiter removal for the failure case, so that the other callsites are still operating correctly. Add proper comments to the code so all these details are fully documented. Thanks to Peter for helping with the analysis and writing the really valuable code comments. Fixes: 56222b212e8e ("futex: Drop hb->lock before enqueueing on the rtmutex") Reported-by: Heiko Carstens Co-developed-by: Peter Zijlstra Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Tested-by: Heiko Carstens Cc: Martin Schwidefsky Cc: linux-s390@vger.kernel.org Cc: Stefan Liebler Cc: Sebastian Sewior Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1901292311410.1950@nanos.tec.linutronix.de --- kernel/futex.c | 28 ++++++++++++++++++---------- kernel/locking/rtmutex.c | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 5ec2473a3497..a0514e01c3eb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2861,35 +2861,39 @@ retry_private: * and BUG when futex_unlock_pi() interleaves with this. * * Therefore acquire wait_lock while holding hb->lock, but drop the - * latter before calling rt_mutex_start_proxy_lock(). This still fully - * serializes against futex_unlock_pi() as that does the exact same - * lock handoff sequence. + * latter before calling __rt_mutex_start_proxy_lock(). This + * interleaves with futex_unlock_pi() -- which does a similar lock + * handoff -- such that the latter can observe the futex_q::pi_state + * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); spin_unlock(q.lock_ptr); + /* + * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter + * such that futex_unlock_pi() is guaranteed to observe the waiter when + * it sees the futex_q::pi_state. + */ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); if (ret) { if (ret == 1) ret = 0; - - spin_lock(q.lock_ptr); - goto no_block; + goto cleanup; } - if (unlikely(to)) hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); +cleanup: spin_lock(q.lock_ptr); /* - * If we failed to acquire the lock (signal/timeout), we must + * If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the - * rt_mutex waitqueue, such that we can keep the hb and rt_mutex - * wait lists consistent. + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait + * lists consistent. * * In particular; it is important that futex_unlock_pi() can not * observe this inconsistency. @@ -3013,6 +3017,10 @@ retry: * there is no point where we hold neither; and therefore * wake_futex_pi() must observe a state consistent with what we * observed. + * + * In particular; this forces __rt_mutex_start_proxy() to + * complete such that we're guaranteed to observe the + * rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 581edcc63c26..978d63a8261c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, rt_mutex_set_owner(lock, NULL); } +/** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: does _NOT_ remove the @waiter on failure; must either call + * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for PI-futex support. + */ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task) { int ret; + lockdep_assert_held(&lock->wait_lock); + if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ret = 0; } - if (unlikely(ret)) - remove_waiter(lock, waiter); - debug_rt_mutex_print_deadlock(waiter); return ret; @@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare * + * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock + * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. + * + * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter + * on failure. + * * Returns: * 0 - task blocked on lock * 1 - acquired the lock for task, caller should wake it up * <0 - error * - * Special API call for FUTEX_REQUEUE_PI support. + * Special API call for PI-futex support. */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, @@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + if (unlikely(ret)) + remove_waiter(lock, waiter); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, * @lock: the rt_mutex we were woken on * @waiter: the pre-initialized rt_mutex_waiter * - * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or + * rt_mutex_wait_proxy_lock(). * * Unless we acquired the lock; we're still enqueued on the wait-list and can * in fact still be granted ownership until we're removed. Therefore we can From 0521e8be211cd20d547bff9da2534b7ed6f2c1b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Feb 2019 13:08:59 +0100 Subject: [PATCH 0810/1436] x86/mm/cpa: Fix set_mce_nospec() The recent commit fe0937b24ff5 ("x86/mm/cpa: Fold cpa_flush_range() and cpa_flush_array() into a single cpa_flush() function") accidentally made the call to make_addr_canonical_again() go away, which breaks set_mce_nospec(). Re-instate the call to convert the address back into canonical form right before invoking either CLFLUSH or INVLPG. Rename the function while at it to be shorter (and less MAGA). Fixes: fe0937b24ff5 ("x86/mm/cpa: Fold cpa_flush_range() and cpa_flush_array() into a single cpa_flush() function") Reported-by: Tony Luck Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Tony Luck Cc: Linus Torvalds Cc: Dan Williams Cc: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Rik van Riel Link: https://lkml.kernel.org/r/20190208120859.GH32511@hirez.programming.kicks-ass.net --- arch/x86/mm/pageattr.c | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4f8972311a77..14e6119838a6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -230,6 +230,29 @@ static bool __cpa_pfn_in_highmap(unsigned long pfn) #endif +/* + * See set_mce_nospec(). + * + * Machine check recovery code needs to change cache mode of poisoned pages to + * UC to avoid speculative access logging another error. But passing the + * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a + * speculative access. So we cheat and flip the top bit of the address. This + * works fine for the code that updates the page tables. But at the end of the + * process we need to flush the TLB and cache and the non-canonical address + * causes a #GP fault when used by the INVLPG and CLFLUSH instructions. + * + * But in the common case we already have a canonical address. This code + * will fix the top bit if needed and is a no-op otherwise. + */ +static inline unsigned long fix_addr(unsigned long addr) +{ +#ifdef CONFIG_X86_64 + return (long)(addr << 1) >> 1; +#else + return addr; +#endif +} + static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx) { if (cpa->flags & CPA_PAGES_ARRAY) { @@ -313,7 +336,7 @@ void __cpa_flush_tlb(void *data) unsigned int i; for (i = 0; i < cpa->numpages; i++) - __flush_tlb_one_kernel(__cpa_addr(cpa, i)); + __flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } static void cpa_flush(struct cpa_data *data, int cache) @@ -347,7 +370,7 @@ static void cpa_flush(struct cpa_data *data, int cache) * Only flush present addresses: */ if (pte && (pte_val(*pte) & _PAGE_PRESENT)) - clflush_cache_range_opt((void *)addr, PAGE_SIZE); + clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); } @@ -1627,29 +1650,6 @@ out: return ret; } -/* - * Machine check recovery code needs to change cache mode of poisoned - * pages to UC to avoid speculative access logging another error. But - * passing the address of the 1:1 mapping to set_memory_uc() is a fine - * way to encourage a speculative access. So we cheat and flip the top - * bit of the address. This works fine for the code that updates the - * page tables. But at the end of the process we need to flush the cache - * and the non-canonical address causes a #GP fault when used by the - * CLFLUSH instruction. - * - * But in the common case we already have a canonical address. This code - * will fix the top bit if needed and is a no-op otherwise. - */ -static inline unsigned long make_addr_canonical_again(unsigned long addr) -{ -#ifdef CONFIG_X86_64 - return (long)(addr << 1) >> 1; -#else - return addr; -#endif -} - - static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, int force_split, int in_flag, From 00a399cad1a063e7665f06b6497a807db20441fd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Feb 2019 07:30:44 +0100 Subject: [PATCH 0811/1436] ALSA: pcm: Revert capture stream behavior change in blocking mode In the commit 62ba568f7aef ("ALSA: pcm: Return 0 when size < start_threshold in capture"), we changed the behavior of __snd_pcm_lib_xfer() to return immediately with 0 when a capture stream has a high start_threshold. This was intended to be a correction of the behavior consistency and looked harmless, but this was the culprit of the recent breakage reported by syzkaller, which was fixed by the commit e190161f96b8 ("ALSA: pcm: Fix tight loop of OSS capture stream"). At the time for the OSS fix, I didn't touch the behavior for ALSA native API, as assuming that this behavior actually is good. But this turned out to be also broken actually for a similar deployment, e.g. one thread goes to a write loop in blocking mode while another thread controls the start/stop of the stream manually. Overall, the original commit is harmful, and it brings less merit to keep that behavior. Let's revert it. Fixes: 62ba568f7aef ("ALSA: pcm: Return 0 when size < start_threshold in capture") Fixes: e190161f96b8 ("ALSA: pcm: Fix tight loop of OSS capture stream") Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 6c99fa8ac5fa..6c0b30391ba9 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2112,13 +2112,6 @@ int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream, return 0; } -/* allow waiting for a capture stream that hasn't been started */ -#if IS_ENABLED(CONFIG_SND_PCM_OSS) -#define wait_capture_start(substream) ((substream)->oss.oss) -#else -#define wait_capture_start(substream) false -#endif - /* the common loop for read/write data */ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, void *data, bool interleaved, @@ -2184,16 +2177,11 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, snd_pcm_update_hw_ptr(substream); if (!is_playback && - runtime->status->state == SNDRV_PCM_STATE_PREPARED) { - if (size >= runtime->start_threshold) { - err = snd_pcm_start(substream); - if (err < 0) - goto _end_unlock; - } else if (!wait_capture_start(substream)) { - /* nothing to do */ - err = 0; + runtime->status->state == SNDRV_PCM_STATE_PREPARED && + size >= runtime->start_threshold) { + err = snd_pcm_start(substream); + if (err < 0) goto _end_unlock; - } } avail = snd_pcm_avail(substream); From 9ab7d228e972b50d06ee28076ab5d0f93e862463 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 12 Jan 2019 10:07:23 +0100 Subject: [PATCH 0812/1436] MAINTAINERS: unify reference to xen-devel list In the linux kernel MAINTAINERS file, largely "xen-devel@lists.xenproject.org (moderated for non-subscribers)" is used to refer to the xen-devel mailing list. The DRM DRIVERS FOR XEN section entry mentions xen-devel@lists.xen.org instead, but that is just the same mailing list as the mailing list above. Signed-off-by: Lukas Bulwahn Signed-off-by: Juergen Gross --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fc9fe92bfa68..9ada1dc4fe83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5092,7 +5092,7 @@ DRM DRIVERS FOR XEN M: Oleksandr Andrushchenko T: git git://anongit.freedesktop.org/drm/drm-misc L: dri-devel@lists.freedesktop.org -L: xen-devel@lists.xen.org +L: xen-devel@lists.xenproject.org (moderated for non-subscribers) S: Supported F: drivers/gpu/drm/xen/ F: Documentation/gpu/xen-front.rst From 82abf33766712d8446ea137a3400165e31bd12c7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 7 Dec 2018 11:16:53 -0800 Subject: [PATCH 0813/1436] drm/sched: Always trace the dependencies we wait on, to fix a race. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The entity->dependency can go away completely once we've called drm_sched_entity_add_dependency_cb() (if the cb is called before we get around to tracing). The tracepoint is more useful if we trace every dependency instead of just ones that get callbacks installed, anyway, so just do that. Fixes any easy-to-produce OOPS when tracing the scheduler on V3D with "perf record -a -e gpu_scheduler:.\* glxgears" and DEBUG_SLAB enabled. Signed-off-by: Eric Anholt Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 4463d3826ecb..e2942c9a11a7 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -440,13 +440,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = sched->ops->dependency(sched_job, entity))) { + trace_drm_sched_job_wait_dep(sched_job, entity->dependency); - if (drm_sched_entity_add_dependency_cb(entity)) { - - trace_drm_sched_job_wait_dep(sched_job, - entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity)) return NULL; - } } /* skip jobs from entity that marked guilty */ From 727962f030c23422a01e8b22d0f463815fb15ec4 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 24 Jan 2019 12:06:00 -0500 Subject: [PATCH 0814/1436] drm/amd/display: Expose connector VRR range via debugfs [Why] It's useful to know the min and max vrr range for IGT testing. [How] Expose the min and max vfreq for the connector via a debugfs file on the connector, "vrr_range". Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 9a7ac58eb18e..ddd75a4d8ba5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -671,6 +671,25 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us return bytes_from_user; } +/* + * Returns the min and max vrr vfreq through the connector's debugfs file. + * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range + */ +static int vrr_range_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + + if (connector->status != connector_status_connected) + return -ENODEV; + + seq_printf(m, "Min: %u\n", (unsigned int)aconnector->min_vfreq); + seq_printf(m, "Max: %u\n", (unsigned int)aconnector->max_vfreq); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(vrr_range); + static const struct file_operations dp_link_settings_debugfs_fops = { .owner = THIS_MODULE, .read = dp_link_settings_read, @@ -697,7 +716,8 @@ static const struct { } dp_debugfs_entries[] = { {"link_settings", &dp_link_settings_debugfs_fops}, {"phy_settings", &dp_phy_settings_debugfs_fop}, - {"test_pattern", &dp_phy_test_pattern_fops} + {"test_pattern", &dp_phy_test_pattern_fops}, + {"vrr_range", &vrr_range_fops} }; int connector_debugfs_init(struct amdgpu_dm_connector *connector) From c675e06a98a474f7ad0af32ce467613da818da52 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 8 Feb 2019 13:55:31 +0100 Subject: [PATCH 0815/1436] ipvlan: decouple l3s mode dependencies from other modes Right now ipvlan has a hard dependency on CONFIG_NETFILTER and otherwise it cannot be built. However, the only ipvlan operation mode that actually depends on netfilter is l3s, everything else is independent of it. Break this hard dependency such that users are able to use ipvlan l3 mode on systems where netfilter is not compiled in. Therefore, this adds a hidden CONFIG_IPVLAN_L3S bool which is defaulting to y when CONFIG_NETFILTER is set in order to retain existing behavior for l3s. All l3s related code is refactored into ipvlan_l3s.c that is compiled in when enabled. Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/Kconfig | 6 +- drivers/net/ipvlan/Makefile | 3 +- drivers/net/ipvlan/ipvlan.h | 37 ++++- drivers/net/ipvlan/ipvlan_core.c | 105 +------------- drivers/net/ipvlan/ipvlan_l3s.c | 227 +++++++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 117 ++-------------- 6 files changed, 287 insertions(+), 208 deletions(-) create mode 100644 drivers/net/ipvlan/ipvlan_l3s.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index edb1c023a753..7f9727f64f55 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -145,13 +145,15 @@ config MACVTAP To compile this driver as a module, choose M here: the module will be called macvtap. +config IPVLAN_L3S + depends on NETFILTER + def_bool y + select NET_L3_MASTER_DEV config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 || !IPV6 - depends on NETFILTER - select NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile index 8a2c64dc9641..3ee95367a994 100644 --- a/drivers/net/ipvlan/Makefile +++ b/drivers/net/ipvlan/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_IPVLAN) += ipvlan.o obj-$(CONFIG_IPVTAP) += ipvtap.o -ipvlan-objs := ipvlan_core.o ipvlan_main.o +ipvlan-objs-$(CONFIG_IPVLAN_L3S) += ipvlan_l3s.o +ipvlan-objs := ipvlan_core.o ipvlan_main.o $(ipvlan-objs-y) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index adb826f55e60..b906d2f6bd04 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -165,10 +165,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); -struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, - u16 proto); -unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state); +struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, + int addr_type, bool use_dest); +void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type); void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast); int ipvlan_link_new(struct net *src_net, struct net_device *dev, @@ -177,6 +176,36 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, void ipvlan_link_delete(struct net_device *dev, struct list_head *head); void ipvlan_link_setup(struct net_device *dev); int ipvlan_link_register(struct rtnl_link_ops *ops); +#ifdef CONFIG_IPVLAN_L3S +int ipvlan_l3s_register(struct ipvl_port *port); +void ipvlan_l3s_unregister(struct ipvl_port *port); +void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet); +int ipvlan_l3s_init(void); +void ipvlan_l3s_cleanup(void); +#else +static inline int ipvlan_l3s_register(struct ipvl_port *port) +{ + return -ENOTSUPP; +} + +static inline void ipvlan_l3s_unregister(struct ipvl_port *port) +{ +} + +static inline void ipvlan_migrate_l3s_hook(struct net *oldnet, + struct net *newnet) +{ +} + +static inline int ipvlan_l3s_init(void) +{ + return 0; +} + +static inline void ipvlan_l3s_cleanup(void) +{ +} +#endif /* CONFIG_IPVLAN_L3S */ static inline bool netif_is_ipvlan_port(const struct net_device *dev) { diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1a8132eb2a3e..e0f5bc82b10c 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -138,7 +138,7 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) return ret; } -static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) +void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) { void *lyr3h = NULL; @@ -355,9 +355,8 @@ out: return ret; } -static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, - void *lyr3h, int addr_type, - bool use_dest) +struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, + int addr_type, bool use_dest) { struct ipvl_addr *addr = NULL; @@ -647,7 +646,9 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: +#ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: +#endif return ipvlan_xmit_mode_l3(skb, dev); } @@ -743,8 +744,10 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); +#ifdef CONFIG_IPVLAN_L3S case IPVLAN_MODE_L3S: return RX_HANDLER_PASS; +#endif } /* Should not reach here */ @@ -753,97 +756,3 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } - -static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, - struct net_device *dev) -{ - struct ipvl_addr *addr = NULL; - struct ipvl_port *port; - void *lyr3h; - int addr_type; - - if (!dev || !netif_is_ipvlan_port(dev)) - goto out; - - port = ipvlan_port_get_rcu(dev); - if (!port || port->mode != IPVLAN_MODE_L3S) - goto out; - - lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); - if (!lyr3h) - goto out; - - addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); -out: - return addr; -} - -struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, - u16 proto) -{ - struct ipvl_addr *addr; - struct net_device *sdev; - - addr = ipvlan_skb_to_addr(skb, dev); - if (!addr) - goto out; - - sdev = addr->master->dev; - switch (proto) { - case AF_INET: - { - int err; - struct iphdr *ip4h = ip_hdr(skb); - - err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, - ip4h->tos, sdev); - if (unlikely(err)) - goto out; - break; - } -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - { - struct dst_entry *dst; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct flowi6 fl6 = { - .flowi6_iif = sdev->ifindex, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_mark = skb->mark, - .flowi6_proto = ip6h->nexthdr, - }; - - skb_dst_drop(skb); - dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, - skb, flags); - skb_dst_set(skb, dst); - break; - } -#endif - default: - break; - } - -out: - return skb; -} - -unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct ipvl_addr *addr; - unsigned int len; - - addr = ipvlan_skb_to_addr(skb, skb->dev); - if (!addr) - goto out; - - skb->dev = addr->master->dev; - len = skb->len + ETH_HLEN; - ipvlan_count_rx(addr->master, len, true, false); -out: - return NF_ACCEPT; -} diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c new file mode 100644 index 000000000000..9a2f24078a54 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -0,0 +1,227 @@ +/* Copyright (c) 2014 Mahesh Bandewar + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#include "ipvlan.h" + +static unsigned int ipvlan_netid __read_mostly; + +struct ipvlan_netns { + unsigned int ipvl_nf_hook_refcnt; +}; + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + int addr_type; + void *lyr3h; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, + struct sk_buff *skb, u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + struct iphdr *ip4h = ip_hdr(skb); + int err; + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); + skb_dst_set(skb, dst); + break; + } +#endif + default: + break; + } +out: + return skb; +} + +static const struct l3mdev_ops ipvl_l3mdev_ops = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + +static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} + +static const struct nf_hook_ops ipvl_nfops[] = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#endif +}; + +static int ipvlan_register_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + int err = 0; + + if (!vnet->ipvl_nf_hook_refcnt) { + err = nf_register_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + if (!err) + vnet->ipvl_nf_hook_refcnt = 1; + } else { + vnet->ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) + return; + + vnet->ipvl_nf_hook_refcnt--; + if (!vnet->ipvl_nf_hook_refcnt) + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); +} + +void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet) +{ + struct ipvlan_netns *old_vnet; + + ASSERT_RTNL(); + + old_vnet = net_generic(oldnet, ipvlan_netid); + if (!old_vnet->ipvl_nf_hook_refcnt) + return; + + ipvlan_register_nf_hook(newnet); + ipvlan_unregister_nf_hook(oldnet); +} + +static void ipvlan_ns_exit(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { + vnet->ipvl_nf_hook_refcnt = 0; + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + } +} + +static struct pernet_operations ipvlan_net_ops = { + .id = &ipvlan_netid, + .size = sizeof(struct ipvlan_netns), + .exit = ipvlan_ns_exit, +}; + +int ipvlan_l3s_init(void) +{ + return register_pernet_subsys(&ipvlan_net_ops); +} + +void ipvlan_l3s_cleanup(void) +{ + unregister_pernet_subsys(&ipvlan_net_ops); +} + +int ipvlan_l3s_register(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + int ret; + + ASSERT_RTNL(); + + ret = ipvlan_register_nf_hook(read_pnet(&port->pnet)); + if (!ret) { + dev->l3mdev_ops = &ipvl_l3mdev_ops; + dev->priv_flags |= IFF_L3MDEV_MASTER; + } + + return ret; +} + +void ipvlan_l3s_unregister(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + + ASSERT_RTNL(); + + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); + dev->l3mdev_ops = NULL; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..8ec73d973079 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,73 +9,10 @@ #include "ipvlan.h" -static unsigned int ipvlan_netid __read_mostly; - -struct ipvlan_netns { - unsigned int ipvl_nf_hook_refcnt; -}; - -static const struct nf_hook_ops ipvl_nfops[] = { - { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = INT_MAX, - }, -#if IS_ENABLED(CONFIG_IPV6) - { - .hook = ipvlan_nf_input, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = INT_MAX, - }, -#endif -}; - -static const struct l3mdev_ops ipvl_l3mdev_ops = { - .l3mdev_l3_rcv = ipvlan_l3_rcv, -}; - -static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) -{ - ipvlan->dev->mtu = dev->mtu; -} - -static int ipvlan_register_nf_hook(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - int err = 0; - - if (!vnet->ipvl_nf_hook_refcnt) { - err = nf_register_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); - if (!err) - vnet->ipvl_nf_hook_refcnt = 1; - } else { - vnet->ipvl_nf_hook_refcnt++; - } - - return err; -} - -static void ipvlan_unregister_nf_hook(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - - if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) - return; - - vnet->ipvl_nf_hook_refcnt--; - if (!vnet->ipvl_nf_hook_refcnt) - nf_unregister_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); -} - static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, struct netlink_ext_ack *extack) { struct ipvl_dev *ipvlan; - struct net_device *mdev = port->dev; unsigned int flags; int err; @@ -97,17 +34,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, } if (nval == IPVLAN_MODE_L3S) { /* New mode is L3S */ - err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); - if (!err) { - mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; - } else + err = ipvlan_l3s_register(port); + if (err) goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; - ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); - mdev->l3mdev_ops = NULL; + ipvlan_l3s_unregister(port); } port->mode = nval; } @@ -166,11 +98,8 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; - if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; - ipvlan_unregister_nf_hook(dev_net(dev)); - dev->l3mdev_ops = NULL; - } + if (port->mode == IPVLAN_MODE_L3S) + ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); while ((skb = __skb_dequeue(&port->backlog)) != NULL) { @@ -446,6 +375,11 @@ static const struct header_ops ipvlan_header_ops = { .cache_update = eth_header_cache_update, }; +static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) +{ + ipvlan->dev->mtu = dev->mtu; +} + static bool netif_is_ipvlan(const struct net_device *dev) { /* both ipvlan and ipvtap devices use the same netdev_ops */ @@ -781,7 +715,6 @@ static int ipvlan_device_event(struct notifier_block *unused, case NETDEV_REGISTER: { struct net *oldnet, *newnet = dev_net(dev); - struct ipvlan_netns *old_vnet; oldnet = read_pnet(&port->pnet); if (net_eq(newnet, oldnet)) @@ -789,12 +722,7 @@ static int ipvlan_device_event(struct notifier_block *unused, write_pnet(&port->pnet, newnet); - old_vnet = net_generic(oldnet, ipvlan_netid); - if (!old_vnet->ipvl_nf_hook_refcnt) - break; - - ipvlan_register_nf_hook(newnet); - ipvlan_unregister_nf_hook(oldnet); + ipvlan_migrate_l3s_hook(oldnet, newnet); break; } case NETDEV_UNREGISTER: @@ -1068,23 +996,6 @@ static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { }; #endif -static void ipvlan_ns_exit(struct net *net) -{ - struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); - - if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { - vnet->ipvl_nf_hook_refcnt = 0; - nf_unregister_net_hooks(net, ipvl_nfops, - ARRAY_SIZE(ipvl_nfops)); - } -} - -static struct pernet_operations ipvlan_net_ops = { - .id = &ipvlan_netid, - .size = sizeof(struct ipvlan_netns), - .exit = ipvlan_ns_exit, -}; - static int __init ipvlan_init_module(void) { int err; @@ -1099,13 +1010,13 @@ static int __init ipvlan_init_module(void) register_inetaddr_notifier(&ipvlan_addr4_notifier_block); register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); - err = register_pernet_subsys(&ipvlan_net_ops); + err = ipvlan_l3s_init(); if (err < 0) goto error; err = ipvlan_link_register(&ipvlan_link_ops); if (err < 0) { - unregister_pernet_subsys(&ipvlan_net_ops); + ipvlan_l3s_cleanup(); goto error; } @@ -1126,7 +1037,7 @@ error: static void __exit ipvlan_cleanup_module(void) { rtnl_link_unregister(&ipvlan_link_ops); - unregister_pernet_subsys(&ipvlan_net_ops); + ipvlan_l3s_cleanup(); unregister_netdevice_notifier(&ipvlan_notifier_block); unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); unregister_inetaddr_validator_notifier( From b9ad6de534279daa1158f52693f6c0f54c6de6ae Mon Sep 17 00:00:00 2001 From: Paul Fox Date: Fri, 8 Feb 2019 16:25:37 +0000 Subject: [PATCH 0816/1436] sfc: add bundle partition definitions to mtd Signed-off-by: Paul Fox Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 2 ++ drivers/net/ethernet/sfc/mcdi_pcol.h | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 6062e99fa69b..bc92d73047c6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6046,6 +6046,8 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS, 0, 0, "sfc_dynamic_cfg_dflt" }, { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0, 0, "sfc_exp_rom_cfg_dflt" }, { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" }, + { NVRAM_PARTITION_TYPE_BUNDLE, 0, 0, "sfc_bundle" }, + { NVRAM_PARTITION_TYPE_BUNDLE_METADATA, 0, 0, "sfc_bundle_metadata" }, }; #define EF10_NVRAM_PARTITION_COUNT ARRAY_SIZE(efx_ef10_nvram_types) diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 3839eec783ea..20a5523bf9f3 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -6784,6 +6784,14 @@ * subset of the information stored in this partition. */ #define NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00 +/* enum: Bundle image partition */ +#define NVRAM_PARTITION_TYPE_BUNDLE 0x1e00 +/* enum: Bundle metadata partition that holds additional information related to + * a bundle update in TLV format + */ +#define NVRAM_PARTITION_TYPE_BUNDLE_METADATA 0x1e01 +/* enum: Bundle update non-volatile log output partition */ +#define NVRAM_PARTITION_TYPE_BUNDLE_LOG 0x1e02 /* enum: Start of reserved value range (firmware may use for any purpose) */ #define NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00 /* enum: End of reserved value range (firmware may use for any purpose) */ From 8c772a9bfc7c07c76f4a58b58910452fbb20843b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 25 Jan 2019 08:12:47 +0800 Subject: [PATCH 0817/1436] blk-iolatency: fix IO hang due to negative inflight counter Our test reported the following stack, and vmcore showed that ->inflight counter is -1. [ffffc9003fcc38d0] __schedule at ffffffff8173d95d [ffffc9003fcc3958] schedule at ffffffff8173de26 [ffffc9003fcc3970] io_schedule at ffffffff810bb6b6 [ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb [ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3 [ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a [ffffc9003fcc3b08] generic_make_request at ffffffff81368b49 [ffffc9003fcc3b68] submit_bio at ffffffff81368d7d [ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4] [ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4] [ffffc9003fcc3d68] do_writepages at ffffffff811c49ae [ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188 [ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301 [ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4] [ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b [ffffc9003fcc3ee8] do_fsync at ffffffff81285abd [ffffc9003fcc3f18] sys_fsync at ffffffff81285d50 [ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04 [ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e The ->inflight counter may be negative (-1) if 1) blk-iolatency was disabled when the IO was issued, 2) blk-iolatency was enabled before this IO reached its endio, 3) the ->inflight counter is decreased from 0 to -1 in endio() In fact the hang can be easily reproduced by the below script, H=/sys/fs/cgroup/unified/ P=/sys/fs/cgroup/unified/test echo "+io" > $H/cgroup.subtree_control mkdir -p $P echo $$ > $P/cgroup.procs xfs_io -f -d -c "pwrite 0 4k" /dev/sdg echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency xfs_io -f -d -c "pwrite 0 4k" /dev/sdg This fixes the problem by freezing the queue so that while enabling/disabling iolatency, there is no inflight rq running. Note that quiesce_queue is not needed as this only updating iolatency configuration about which dispatching request_queue doesn't care. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 52 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index fc714ef402a6..1893686a9c1f 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "blk-rq-qos.h" #include "blk-stat.h" @@ -601,6 +602,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) return; enabled = blk_iolatency_enabled(iolat->blkiolat); + if (!enabled) + return; + while (blkg && blkg->parent) { iolat = blkg_to_lat(blkg); if (!iolat) { @@ -610,7 +614,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) rqw = &iolat->rq_wait; atomic_dec(&rqw->inflight); - if (!enabled || iolat->min_lat_nsec == 0) + if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, issue_as_root); @@ -754,10 +758,13 @@ int blk_iolatency_init(struct request_queue *q) return 0; } -static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +/* + * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise + * return 0. + */ +static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -766,9 +773,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) BLKIOLATENCY_MAX_WIN_SIZE); if (!oldval && val) - atomic_inc(&blkiolat->enabled); + return 1; if (oldval && !val) - atomic_dec(&blkiolat->enabled); + return -1; + return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -800,6 +808,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; + int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -834,7 +843,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - iolatency_set_min_lat_nsec(blkg, lat_val); + enable = iolatency_set_min_lat_nsec(blkg, lat_val); + if (enable) { + WARN_ON_ONCE(!blk_get_queue(blkg->q)); + blkg_get(blkg); + } + if (oldval != iolat->min_lat_nsec) { iolatency_clear_scaling(blkg); } @@ -842,6 +856,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = 0; out: blkg_conf_finish(&ctx); + if (ret == 0 && enable) { + struct iolatency_grp *tmp = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = tmp->blkiolat; + + blk_mq_freeze_queue(blkg->q); + + if (enable == 1) + atomic_inc(&blkiolat->enabled); + else if (enable == -1) + atomic_dec(&blkiolat->enabled); + else + WARN_ON_ONCE(1); + + blk_mq_unfreeze_queue(blkg->q); + + blkg_put(blkg); + blk_put_queue(blkg->q); + } return ret ?: nbytes; } @@ -977,8 +1009,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); + struct blk_iolatency *blkiolat = iolat->blkiolat; + int ret; - iolatency_set_min_lat_nsec(blkg, 0); + ret = iolatency_set_min_lat_nsec(blkg, 0); + if (ret == 1) + atomic_inc(&blkiolat->enabled); + if (ret == -1) + atomic_dec(&blkiolat->enabled); iolatency_clear_scaling(blkg); } From 391f552af213985d3d324c60004475759a7030c5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 25 Jan 2019 08:12:48 +0800 Subject: [PATCH 0818/1436] Blk-iolatency: warn on negative inflight IO counter This is to catch any unexpected negative value of inflight IO counter. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 1893686a9c1f..2620baa1f699 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -592,6 +592,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 now = ktime_to_ns(ktime_get()); bool issue_as_root = bio_issue_as_root_blkg(bio); bool enabled = false; + int inflight = 0; blkg = bio->bi_blkg; if (!blkg || !bio_flagged(bio, BIO_TRACKED)) @@ -613,7 +614,8 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) } rqw = &iolat->rq_wait; - atomic_dec(&rqw->inflight); + inflight = atomic_dec_return(&rqw->inflight); + WARN_ON_ONCE(inflight < 0); if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, From 2698484178ca5cbfdde189b1d8809e1528f82a10 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 25 Jan 2019 08:12:49 +0800 Subject: [PATCH 0819/1436] blk-mq: remove duplicated definition of blk_mq_freeze_queue As the prototype has been defined in "include/linux/blk-mq.h", the one in "block/blk-mq.h" can be removed then. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-mq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index d943d46b0785..d0b3dd54ef8d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -36,7 +36,6 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; -void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); From c0bc5d8e2badeebe14b4005da794a8705b69320e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Feb 2019 17:41:13 +0100 Subject: [PATCH 0820/1436] nfp: flower: remove unused index from nfp_fl_pedit() Static checker warning complains on uninitialized variable: drivers/net/ethernet/netronome/nfp/flower/action.c:618 nfp_fl_pedit() error: uninitialized symbol 'idx'. Which is actually never used from the functions that take it as parameter. Remove it. Fixes: 738678817573 ("drivers: net: use flow action infrastructure") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/flower/action.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 583e97c99e68..eeda4ed98333 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -345,7 +345,7 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) } static int -nfp_fl_set_eth(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_eth(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_eth *set_eth) { u32 exact, mask; @@ -376,7 +376,7 @@ struct ipv4_ttl_word { }; static int -nfp_fl_set_ip4(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ip4_addrs *set_ip_addr, struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos) { @@ -505,7 +505,7 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, } static int -nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ipv6_addr *ip_dst, struct nfp_fl_set_ipv6_addr *ip_src, struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl) @@ -541,7 +541,7 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, int idx, u32 off, } static int -nfp_fl_set_tport(const struct flow_action_entry *act, int idx, u32 off, +nfp_fl_set_tport(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_tport *set_tport, int opcode) { u32 exact, mask; @@ -598,8 +598,8 @@ nfp_fl_pedit(const struct flow_action_entry *act, struct nfp_fl_set_eth set_eth; size_t act_size = 0; u8 ip_proto = 0; - int idx, err; u32 offset; + int err; memset(&set_ip6_tc_hl_fl, 0, sizeof(set_ip6_tc_hl_fl)); memset(&set_ip_ttl_tos, 0, sizeof(set_ip_ttl_tos)); @@ -614,22 +614,22 @@ nfp_fl_pedit(const struct flow_action_entry *act, switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - err = nfp_fl_set_eth(act, idx, offset, &set_eth); + err = nfp_fl_set_eth(act, offset, &set_eth); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: - err = nfp_fl_set_ip4(act, idx, offset, &set_ip_addr, + err = nfp_fl_set_ip4(act, offset, &set_ip_addr, &set_ip_ttl_tos); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: - err = nfp_fl_set_ip6(act, idx, offset, &set_ip6_dst, + err = nfp_fl_set_ip6(act, offset, &set_ip6_dst, &set_ip6_src, &set_ip6_tc_hl_fl); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: - err = nfp_fl_set_tport(act, idx, offset, &set_tport, + err = nfp_fl_set_tport(act, offset, &set_tport, NFP_FL_ACTION_OPCODE_SET_TCP); break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - err = nfp_fl_set_tport(act, idx, offset, &set_tport, + err = nfp_fl_set_tport(act, offset, &set_tport, NFP_FL_ACTION_OPCODE_SET_UDP); break; default: From a9b6d9ef0965d352a7c87175d7e9575b0e31d6d8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 12:37:33 -0600 Subject: [PATCH 0821/1436] veth: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/veth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f412ea1cef18..fbf890ebbeae 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -540,8 +540,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(rq->dev, xdp_prog, act); + /* fall through */ case XDP_DROP: goto err_xdp; } @@ -661,8 +663,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(rq->dev, xdp_prog, act); + /* fall through */ case XDP_DROP: goto drop; } From 2067458cf67490ba1013c918803852fe5e56221a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 12:53:56 -0600 Subject: [PATCH 0822/1436] net: appletalk: cops: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that, in this particular case, the fall-through annotation is placed at the beginning of the code comment, which is what GCC is expecting to find. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/appletalk/cops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index f90bb723985f..b3c63d2f16aa 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -301,7 +301,7 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr) dev->irq = cops_irq(ioaddr, board); if (dev->irq) break; - /* No IRQ found on this port, fallthrough */ + /* fall through - Once no IRQ found on this port. */ case 1: retval = -EINVAL; goto err_out; From 209d6e7b47e3547b1fdf84de2d0c9d18cbf335cf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 13:07:29 -0600 Subject: [PATCH 0823/1436] net: usb: pegasus: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f4247b275e09..63e44e746ccc 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1011,6 +1011,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd) switch (cmd) { case SIOCDEVPRIVATE: data[0] = pegasus->phy; + /* fall through */ case SIOCDEVPRIVATE + 1: read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); res = 0; From 0d156a371542b14cf9bdd75c8e8453172864e964 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 13:09:06 -0600 Subject: [PATCH 0824/1436] net: usb: rtl8150: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/usb/rtl8150.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 3f145e4c6c08..59dbdbb5feff 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -847,6 +847,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCDEVPRIVATE: data[0] = dev->phy; + /* fall through */ case SIOCDEVPRIVATE + 1: read_mii_word(dev, dev->phy, (data[1] & 0x1f), &data[3]); break; From 1323f75fba732a20a694b66ee9eac22387664a4c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 13:14:12 -0600 Subject: [PATCH 0825/1436] net: fddi: skfp: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/fddi/skfp/pcmplc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/fddi/skfp/pcmplc.c b/drivers/net/fddi/skfp/pcmplc.c index 6ef44c480bd5..f29f5a6a45ab 100644 --- a/drivers/net/fddi/skfp/pcmplc.c +++ b/drivers/net/fddi/skfp/pcmplc.c @@ -851,6 +851,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd) case ACTIONS(PC5_SIGNAL) : ACTIONS_DONE() ; + /* fall through */ case PC5_SIGNAL : if ((cmd != PC_SIGNAL) && (cmd != PC_TIMEOUT_LCT)) break ; From 671f2f96813569fcc4bac306cb5a61752de9b91d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 13:16:47 -0600 Subject: [PATCH 0826/1436] net: wimax/i2400m: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index f8eb66ef2944..73842a830645 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -210,6 +210,7 @@ retry: msleep(10); /* give the device some time */ goto retry; } + /* fall through */ case -EINVAL: /* while removing driver */ case -ENODEV: /* dev disconnect ... */ case -ENOENT: /* just ignore it */ From d29d87f7e61226c339d1212beff6b82f653acd67 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Feb 2019 11:19:36 -0800 Subject: [PATCH 0827/1436] btf: separate btf creation and loading This change splits out previous btf__new functionality of constructing struct btf and loading it into kernel into two: - btf__new() just creates and initializes struct btf - btf__load() attempts to load existing struct btf into kernel btf__free will still close BTF fd, if it was ever loaded successfully into kernel. This change allows users of libbpf to manipulate BTF using its API, without the need to unnecessarily load it into kernel. One of the intended use cases is pahole, which will do DWARF to BTF conversion and then use libbpf to do type deduplication, while then handling ELF sections overwriting and other concerns on its own. Fixes: 2d3feca8c44f ("bpf: btf: print map dump and lookup with btf info") Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 54 ++++++++++++++++++++++------------------ tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf.map | 1 + 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 4324eb47d214..46db0a3b5cb7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -367,8 +367,6 @@ void btf__free(struct btf *btf) struct btf *btf__new(__u8 *data, __u32 size) { - __u32 log_buf_size = 0; - char *log_buf = NULL; struct btf *btf; int err; @@ -378,15 +376,6 @@ struct btf *btf__new(__u8 *data, __u32 size) btf->fd = -1; - log_buf = malloc(BPF_LOG_BUF_SIZE); - if (!log_buf) { - err = -ENOMEM; - goto done; - } - - *log_buf = 0; - log_buf_size = BPF_LOG_BUF_SIZE; - btf->data = malloc(size); if (!btf->data) { err = -ENOMEM; @@ -396,17 +385,6 @@ struct btf *btf__new(__u8 *data, __u32 size) memcpy(btf->data, data, size); btf->data_size = size; - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); - - if (btf->fd == -1) { - err = -errno; - pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (log_buf && *log_buf) - pr_warning("%s\n", log_buf); - goto done; - } - err = btf_parse_hdr(btf); if (err) goto done; @@ -418,8 +396,6 @@ struct btf *btf__new(__u8 *data, __u32 size) err = btf_parse_type_sec(btf); done: - free(log_buf); - if (err) { btf__free(btf); return ERR_PTR(err); @@ -428,6 +404,36 @@ done: return btf; } +int btf__load(struct btf *btf) +{ + __u32 log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf = NULL; + int err = 0; + + if (btf->fd >= 0) + return -EEXIST; + + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + if (btf->fd < 0) { + err = -errno; + pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warning("%s\n", log_buf); + goto done; + } + +done: + free(log_buf); + return err; +} + int btf__fd(const struct btf *btf) { return btf->fd; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b393da90cc85..f55b7bc98d9e 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -57,6 +57,7 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8d64ada5f728..e3c39edfb9d3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -836,7 +836,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf)) { + if (IS_ERR(obj->btf) || btf__load(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 89c1149e32ee..f5372df143f4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -137,6 +137,7 @@ LIBBPF_0.0.2 { btf__get_map_kv_tids; btf__get_nr_types; btf__get_strings; + btf__load; btf_ext__free; btf_ext__func_info_rec_size; btf_ext__line_info_rec_size; From 02c874460f3d9213096323ac8a937fb486a4e70d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Feb 2019 11:19:37 -0800 Subject: [PATCH 0828/1436] btf: expose API to work with raw btf data This patch exposes new API btf__get_raw_data() that allows to get a copy of raw BTF data out of struct btf. This is useful for external programs that need to manipulate raw data, e.g., pahole using btf__dedup() to deduplicate BTF type info and then writing it back to file. Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 6 ++++++ tools/lib/bpf/btf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 8 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 46db0a3b5cb7..4fba0aa989df 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -439,6 +439,12 @@ int btf__fd(const struct btf *btf) return btf->fd; } +const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +{ + *size = btf->data_size; + return btf->data; +} + void btf__get_strings(const struct btf *btf, const char **strings, __u32 *str_len) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index f55b7bc98d9e..10fe412461fe 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -66,6 +66,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings, __u32 *str_len); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f5372df143f4..9e10467f8cbb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -136,6 +136,7 @@ LIBBPF_0.0.2 { btf__dedup; btf__get_map_kv_tids; btf__get_nr_types; + btf__get_raw_data; btf__get_strings; btf__load; btf_ext__free; From ae4ab4b4117d23da49f04a7e1fe82a41e6074eeb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Feb 2019 11:19:38 -0800 Subject: [PATCH 0829/1436] btf: expose API to work with raw btf_ext data This patch changes struct btf_ext to retain original data in sequential block of memory, which makes it possible to expose btf_ext__get_raw_data() interface similar to btf__get_raw_data(), allowing users of libbpf to get access to raw representation of .BTF.ext section. Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 102 +++++++++++++++++++++------------------ tools/lib/bpf/btf.h | 2 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 57 insertions(+), 48 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 4fba0aa989df..f6b724ed1bdd 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -42,9 +42,8 @@ struct btf { struct btf_ext_info { /* - * info points to a deep copy of the individual info section - * (e.g. func_info and line_info) from the .BTF.ext. - * It does not include the __u32 rec_size. + * info points to the individual info section (e.g. func_info and + * line_info) from the .BTF.ext. It does not include the __u32 rec_size. */ void *info; __u32 rec_size; @@ -52,8 +51,13 @@ struct btf_ext_info { }; struct btf_ext { + union { + struct btf_ext_header *hdr; + void *data; + }; struct btf_ext_info func_info; struct btf_ext_info line_info; + __u32 data_size; }; struct btf_ext_info_sec { @@ -596,7 +600,7 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, return 0; } -struct btf_ext_sec_copy_param { +struct btf_ext_sec_setup_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -604,20 +608,14 @@ struct btf_ext_sec_copy_param { const char *desc; }; -static int btf_ext_copy_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - struct btf_ext_sec_copy_param *ext_sec) +static int btf_ext_setup_info(struct btf_ext *btf_ext, + struct btf_ext_sec_setup_param *ext_sec) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; /* The start of the info sec (including the __u32 record_size). */ - const void *info; - - /* data and data_size do not include btf_ext_header from now on */ - data = data + hdr->hdr_len; - data_size -= hdr->hdr_len; + void *info; if (ext_sec->off & 0x03) { pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", @@ -625,16 +623,15 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, return -EINVAL; } - if (data_size < ext_sec->off || - ext_sec->len > data_size - ext_sec->off) { + info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; + info_left = ext_sec->len; + + if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", - ext_sec->desc, ext_sec->off, ext_sec->len); + ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } - info = data + ext_sec->off; - info_left = ext_sec->len; - /* At least a record size */ if (info_left < sizeof(__u32)) { pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); @@ -646,7 +643,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, if (record_size < ext_sec->min_rec_size || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", - ext_sec->desc, record_size); + ext_sec->desc, record_size); return -EINVAL; } @@ -692,42 +689,35 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; - ext_info->info = malloc(ext_info->len); - if (!ext_info->info) - return -ENOMEM; - memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + ext_info->info = info + sizeof(__u32); return 0; } -static int btf_ext_copy_func_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size) +static int btf_ext_setup_func_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->func_info_off, - .len = hdr->func_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->func_info_off, + .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m); + return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_copy_line_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size) +static int btf_ext_setup_line_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->line_info_off, - .len = hdr->line_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->line_info_off, + .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m); + return btf_ext_setup_info(btf_ext, ¶m); } static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) @@ -767,9 +757,7 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (!btf_ext) return; - - free(btf_ext->func_info.info); - free(btf_ext->line_info.info); + free(btf_ext->data); free(btf_ext); } @@ -786,13 +774,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) if (!btf_ext) return ERR_PTR(-ENOMEM); - err = btf_ext_copy_func_info(btf_ext, data, size); - if (err) { - btf_ext__free(btf_ext); - return ERR_PTR(err); + btf_ext->data_size = size; + btf_ext->data = malloc(size); + if (!btf_ext->data) { + err = -ENOMEM; + goto done; } + memcpy(btf_ext->data, data, size); - err = btf_ext_copy_line_info(btf_ext, data, size); + err = btf_ext_setup_func_info(btf_ext); + if (err) + goto done; + + err = btf_ext_setup_line_info(btf_ext); + if (err) + goto done; + +done: if (err) { btf_ext__free(btf_ext); return ERR_PTR(err); @@ -801,6 +799,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) return btf_ext; } +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +{ + *size = btf_ext->data_size; + return btf_ext->data; +} + static int btf_ext_reloc_info(const struct btf *btf, const struct btf_ext_info *ext_info, const char *sec_name, __u32 insns_cnt, @@ -849,7 +853,8 @@ static int btf_ext_reloc_info(const struct btf *btf, return -ENOENT; } -int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **func_info, __u32 *cnt) { @@ -857,7 +862,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex insns_cnt, func_info, cnt); } -int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **line_info, __u32 *cnt) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 10fe412461fe..0306b54d54eb 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -78,6 +78,8 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext, + __u32 *size); LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9e10467f8cbb..eb78c7c261d9 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -141,6 +141,7 @@ LIBBPF_0.0.2 { btf__load; btf_ext__free; btf_ext__func_info_rec_size; + btf_ext__get_raw_data; btf_ext__line_info_rec_size; btf_ext__new; btf_ext__reloc_func_info; From 49b57e0d01db73c99f86d68480fb9b4014bb1060 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Feb 2019 11:19:39 -0800 Subject: [PATCH 0830/1436] tools/bpf: remove btf__get_strings() superseded by raw data API Now that we have btf__get_raw_data() it's trivial for tests to iterate over all strings for testing purposes, which eliminates the need for btf__get_strings() API. Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 7 ----- tools/lib/bpf/btf.h | 2 -- tools/lib/bpf/libbpf.map | 1 - tools/testing/selftests/bpf/test_btf.c | 39 +++++++++++++++++--------- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f6b724ed1bdd..6953fedb88ff 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -449,13 +449,6 @@ const void *btf__get_raw_data(const struct btf *btf, __u32 *size) return btf->data; } -void btf__get_strings(const struct btf *btf, const char **strings, - __u32 *str_len) -{ - *strings = btf->strings; - *str_len = btf->hdr->str_len; -} - const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 0306b54d54eb..94bbc249b0f1 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -67,8 +67,6 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); -LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings, - __u32 *str_len); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index eb78c7c261d9..5fc8222209f8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -137,7 +137,6 @@ LIBBPF_0.0.2 { btf__get_map_kv_tids; btf__get_nr_types; btf__get_raw_data; - btf__get_strings; btf__load; btf_ext__free; btf_ext__func_info_rec_size; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 447acc34db94..bbcacba39590 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5882,15 +5882,17 @@ static void dump_btf_strings(const char *strs, __u32 len) static int do_test_dedup(unsigned int test_num) { const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; - int err = 0, i; - __u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len; - void *raw_btf; - unsigned int raw_btf_size; + __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; + const struct btf_header *test_hdr, *expect_hdr; struct btf *test_btf = NULL, *expect_btf = NULL; + const void *test_btf_data, *expect_btf_data; const char *ret_test_next_str, *ret_expect_next_str; const char *test_strs, *expect_strs; const char *test_str_cur, *test_str_end; const char *expect_str_cur, *expect_str_end; + unsigned int raw_btf_size; + void *raw_btf; + int err = 0, i; fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); @@ -5927,23 +5929,34 @@ static int do_test_dedup(unsigned int test_num) goto done; } - btf__get_strings(test_btf, &test_strs, &test_str_len); - btf__get_strings(expect_btf, &expect_strs, &expect_str_len); - if (CHECK(test_str_len != expect_str_len, - "test_str_len:%u != expect_str_len:%u", - test_str_len, expect_str_len)) { + test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + if (CHECK(test_btf_size != expect_btf_size, + "test_btf_size:%u != expect_btf_size:%u", + test_btf_size, expect_btf_size)) { + err = -1; + goto done; + } + + test_hdr = test_btf_data; + test_strs = test_btf_data + test_hdr->str_off; + expect_hdr = expect_btf_data; + expect_strs = expect_btf_data + expect_hdr->str_off; + if (CHECK(test_hdr->str_len != expect_hdr->str_len, + "test_hdr->str_len:%u != expect_hdr->str_len:%u", + test_hdr->str_len, expect_hdr->str_len)) { fprintf(stderr, "\ntest strings:\n"); - dump_btf_strings(test_strs, test_str_len); + dump_btf_strings(test_strs, test_hdr->str_len); fprintf(stderr, "\nexpected strings:\n"); - dump_btf_strings(expect_strs, expect_str_len); + dump_btf_strings(expect_strs, expect_hdr->str_len); err = -1; goto done; } test_str_cur = test_strs; - test_str_end = test_strs + test_str_len; + test_str_end = test_strs + test_hdr->str_len; expect_str_cur = expect_strs; - expect_str_end = expect_strs + expect_str_len; + expect_str_end = expect_strs + expect_hdr->str_len; while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { size_t test_len, expect_len; From 7499a288bf1a4a49be9d72beb0a5c7b9aa6ffec9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 8 Feb 2019 13:58:38 -0600 Subject: [PATCH 0831/1436] xen-netback: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 2625740bdc4a..330ddb64930f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -655,7 +655,7 @@ static void frontend_changed(struct xenbus_device *dev, set_backend_state(be, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; - /* fall through if not online */ + /* fall through - if not online */ case XenbusStateUnknown: set_backend_state(be, XenbusStateClosed); device_unregister(&dev->dev); From 13c80dda84c4c0c0f52f8dc2e8f083ca3ccc2aa0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 8 Feb 2019 17:56:31 +0100 Subject: [PATCH 0832/1436] MAINTAINERS: Update the ocores i2c bus driver maintainer, etc The listed maintainer has not been responding to emails for a while. Add myself as a second maintainer. Add the platform data include file, which was not listed. Signed-off-by: Andrew Lunn Signed-off-by: Wolfram Sang --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8c68de3cfd80..e832eed65767 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11307,10 +11307,12 @@ F: include/dt-bindings/ OPENCORES I2C BUS DRIVER M: Peter Korsgaard +M: Andrew Lunn L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/i2c/busses/i2c-ocores F: drivers/i2c/busses/i2c-ocores.c +F: include/linux/platform_data/i2c-ocores.h OPENRISC ARCHITECTURE M: Jonas Bonn From 7c62cfb8c5744b377e9f33806e0db87a00dc6884 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:45 +0000 Subject: [PATCH 0833/1436] devlink: publish params only after driver init is done Currently, user can do dump or get of param values right after the devlink params are registered. However the driver may not be initialized which is an issue. The same problem happens during notification upon param registration. Allow driver to publish devlink params whenever it is ready to handle get() ops. Note that this cannot be resolved by init reordering, as the "driverinit" params have to be available before the driver is initialized (it needs the param values there). Signed-off-by: Jiri Pirko Cc: Michael Chan Cc: Tariq Toukan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 3 ++ drivers/net/ethernet/mellanox/mlx4/main.c | 1 + drivers/net/ethernet/mellanox/mlxsw/core.c | 5 ++ include/net/devlink.h | 11 +++++ net/core/devlink.c | 48 ++++++++++++++++++- 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index a6abfa4fb168..2955e404fd18 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -258,6 +258,9 @@ int bnxt_dl_register(struct bnxt *bp) netdev_err(bp->dev, "devlink_port_params_register failed"); goto err_dl_port_unreg; } + + devlink_params_publish(dl); + return 0; err_dl_port_unreg: diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index bdb8dd161923..1f6e16d5ea6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3981,6 +3981,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) goto err_params_unregister; + devlink_params_publish(devlink); pci_save_state(pdev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 4f6fa515394e..b505d3858235 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1062,6 +1062,9 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } + if (mlxsw_driver->params_register && !reload) + devlink_params_publish(devlink); + return 0; err_driver_init: @@ -1131,6 +1134,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } + if (mlxsw_core->driver->params_unregister && !reload) + devlink_params_unpublish(devlink); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); diff --git a/include/net/devlink.h b/include/net/devlink.h index c12ad6e9095d..2b384a38911b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -358,6 +358,7 @@ struct devlink_param_item { const struct devlink_param *param; union devlink_param_value driverinit_value; bool driverinit_value_valid; + bool published; }; enum devlink_param_generic_id { @@ -618,6 +619,8 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +void devlink_params_publish(struct devlink *devlink); +void devlink_params_unpublish(struct devlink *devlink); int devlink_port_params_register(struct devlink_port *devlink_port, const struct devlink_param *params, size_t params_count); @@ -724,6 +727,14 @@ static inline void devlink_unregister(struct devlink *devlink) { } +static inline void devlink_params_publish(struct devlink *devlink) +{ +} + +static inline void devlink_params_unpublish(struct devlink *devlink) +{ +} + static inline void devlink_free(struct devlink *devlink) { kfree(devlink); diff --git a/net/core/devlink.c b/net/core/devlink.c index 7fbdba547d4f..e6a015b8ac9b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2858,6 +2858,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, u32 portid, u32 seq, int flags) { union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; + bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; @@ -2876,12 +2877,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; param_value[i] = param_item->driverinit_value; } else { + if (!param_item->published) + continue; ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) return err; param_value[i] = ctx.val; } + param_value_set[i] = true; } hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2916,7 +2920,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, goto param_nest_cancel; for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { - if (!devlink_param_cmode_is_supported(param, i)) + if (!param_value_set[i]) continue; err = devlink_nl_param_value_fill_one(msg, param->type, i, param_value[i]); @@ -5886,6 +5890,48 @@ void devlink_params_unregister(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_params_unregister); +/** + * devlink_params_publish - publish configuration parameters + * + * @devlink: devlink + * + * Publish previously registered configuration parameters. + */ +void devlink_params_publish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + } +} +EXPORT_SYMBOL_GPL(devlink_params_publish); + +/** + * devlink_params_unpublish - unpublish configuration parameters + * + * @devlink: devlink + * + * Unpublish previously registered configuration parameters. + */ +void devlink_params_unpublish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (!param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + } +} +EXPORT_SYMBOL_GPL(devlink_params_unpublish); + /** * devlink_port_params_register - register port configuration parameters * From bb72e68bd1f2a86c37a990e9d905f34b712ae2b6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:46 +0000 Subject: [PATCH 0834/1436] lib: objagg: fix typo in objagg_stats_put() docstring Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- lib/objagg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/objagg.c b/lib/objagg.c index c9b457a91153..dae390bcef1a 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -485,7 +485,7 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) EXPORT_SYMBOL(objagg_stats_get); /** - * objagg_stats_puts - puts stats of the objagg instance + * objagg_stats_put - puts stats of the objagg instance * @objagg_stats: objagg instance stats * * Note: all locking must be provided by the caller. From 9069a3817d82b01b3a55da382c774e3575946130 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:46 +0000 Subject: [PATCH 0835/1436] lib: objagg: implement optimization hints assembly and use hints for object creation Implement simple greedy algo to find more optimized root-delta tree for a given objagg instance. This "hints" can be used by a driver to: 1) check if the hints are better (driver's choice) than the original objagg tree. Driver does comparison of objagg stats and hints stats. 2) use the hints to create a new objagg instance which will construct the root-delta tree according to the passed hints. Currently, only a simple greedy algorithm is implemented. Basically it finds the roots according to the maximal possible user count including deltas. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_acl_erp.c | 37 +- include/linux/objagg.h | 20 +- lib/objagg.c | 575 +++++++++++++++++- lib/test_objagg.c | 194 +++++- 4 files changed, 803 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 2941967e1cc5..302070a74f2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1200,6 +1200,32 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, return 0; } +static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, + const void *obj) +{ + const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + const struct mlxsw_sp_acl_erp_key *key = obj; + u16 delta_start; + u8 delta_mask; + int err; + + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + return err ? false : true; +} + +static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) +{ + const struct mlxsw_sp_acl_erp_key *key1 = obj1; + const struct mlxsw_sp_acl_erp_key *key2 = obj2; + + /* For hints purposes, two objects are considered equal + * in case the masks are the same. Does not matter what + * the "ctcam" value is. + */ + return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); +} + static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, void *obj) { @@ -1254,12 +1280,17 @@ static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj) +static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj, + unsigned int root_id) { struct mlxsw_sp_acl_atcam_region *aregion = priv; struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; struct mlxsw_sp_acl_erp_key *key = obj; + if (!key->ctcam && + root_id != OBJAGG_OBJ_ROOT_ID_INVALID && + root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return ERR_PTR(-ENOBUFS); return erp_table->ops->erp_create(erp_table, key); } @@ -1273,6 +1304,8 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), + .delta_check = mlxsw_sp_acl_erp_delta_check, + .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, .delta_create = mlxsw_sp_acl_erp_delta_create, .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, .root_create = mlxsw_sp_acl_erp_root_create, @@ -1290,7 +1323,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) return ERR_PTR(-ENOMEM); erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, - aregion); + NULL, aregion); if (IS_ERR(erp_table->objagg)) { err = PTR_ERR(erp_table->objagg); goto err_objagg_create; diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 34f38c186ea0..a675286df1af 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -6,14 +6,19 @@ struct objagg_ops { size_t obj_size; + bool (*delta_check)(void *priv, const void *parent_obj, + const void *obj); + int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); - void * (*root_create)(void *priv, void *obj); + void * (*root_create)(void *priv, void *obj, unsigned int root_id); +#define OBJAGG_OBJ_ROOT_ID_INVALID UINT_MAX void (*root_destroy)(void *priv, void *root_priv); }; struct objagg; struct objagg_obj; +struct objagg_hints; const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj); const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj); @@ -21,7 +26,8 @@ const void *objagg_obj_raw(const struct objagg_obj *objagg_obj); struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj); void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj); -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv); +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *hints, void *priv); void objagg_destroy(struct objagg *objagg); struct objagg_obj_stats { @@ -43,4 +49,14 @@ struct objagg_stats { const struct objagg_stats *objagg_stats_get(struct objagg *objagg); void objagg_stats_put(const struct objagg_stats *objagg_stats); +enum objagg_opt_algo_type { + OBJAGG_OPT_ALGO_SIMPLE_GREEDY, +}; + +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type); +void objagg_hints_put(struct objagg_hints *objagg_hints); +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints); + #endif diff --git a/lib/objagg.c b/lib/objagg.c index dae390bcef1a..befe8a47d080 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,34 @@ #define CREATE_TRACE_POINTS #include +struct objagg_hints { + struct rhashtable node_ht; + struct rhashtable_params ht_params; + struct list_head node_list; + unsigned int node_count; + unsigned int root_count; + unsigned int refcount; + const struct objagg_ops *ops; +}; + +struct objagg_hints_node { + struct rhash_head ht_node; /* member of objagg_hints->node_ht */ + struct list_head list; /* member of objagg_hints->node_list */ + struct objagg_hints_node *parent; + unsigned int root_id; + struct objagg_obj_stats_info stats_info; + unsigned long obj[0]; +}; + +static struct objagg_hints_node * +objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj) +{ + if (!objagg_hints) + return NULL; + return rhashtable_lookup_fast(&objagg_hints->node_ht, obj, + objagg_hints->ht_params); +} + struct objagg { const struct objagg_ops *ops; void *priv; @@ -18,6 +47,8 @@ struct objagg { struct rhashtable_params ht_params; struct list_head obj_list; unsigned int obj_count; + struct ida root_ida; + struct objagg_hints *hints; }; struct objagg_obj { @@ -30,6 +61,7 @@ struct objagg_obj { void *delta_priv; /* user delta private */ void *root_priv; /* user root private */ }; + unsigned int root_id; unsigned int refcount; /* counts number of users of this object * including nested objects */ @@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj) static int objagg_obj_parent_assign(struct objagg *objagg, struct objagg_obj *objagg_obj, - struct objagg_obj *parent) + struct objagg_obj *parent, + bool take_parent_ref) { void *delta_priv; @@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg, */ objagg_obj->parent = parent; objagg_obj->delta_priv = delta_priv; - objagg_obj_ref_inc(objagg_obj->parent); + if (take_parent_ref) + objagg_obj_ref_inc(objagg_obj->parent); trace_objagg_obj_parent_assign(objagg, objagg_obj, parent, parent->refcount); @@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg, if (!objagg_obj_is_root(objagg_obj_cur)) continue; err = objagg_obj_parent_assign(objagg, objagg_obj, - objagg_obj_cur); + objagg_obj_cur, true); if (!err) return 0; } @@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg, __objagg_obj_put(objagg, objagg_obj->parent); } -static int objagg_obj_root_create(struct objagg *objagg, - struct objagg_obj *objagg_obj) +static int objagg_obj_root_id_alloc(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) { - objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, - objagg_obj->obj); - if (IS_ERR(objagg_obj->root_priv)) - return PTR_ERR(objagg_obj->root_priv); + unsigned int min, max; + int root_id; + /* In case there are no hints available, the root id is invalid. */ + if (!objagg->hints) { + objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID; + return 0; + } + + if (hnode) { + min = hnode->root_id; + max = hnode->root_id; + } else { + /* For objects with no hint, start after the last + * hinted root_id. + */ + min = objagg->hints->root_count; + max = ~0; + } + + root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL); + + if (root_id < 0) + return root_id; + objagg_obj->root_id = root_id; + return 0; +} + +static void objagg_obj_root_id_free(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg->hints) + return; + ida_free(&objagg->root_ida, objagg_obj->root_id); +} + +static int objagg_obj_root_create(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) +{ + int err; + + err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode); + if (err) + return err; + objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, + objagg_obj->obj, + objagg_obj->root_id); + if (IS_ERR(objagg_obj->root_priv)) { + err = PTR_ERR(objagg_obj->root_priv); + goto err_root_create; + } trace_objagg_obj_root_create(objagg, objagg_obj); return 0; + +err_root_create: + objagg_obj_root_id_free(objagg, objagg_obj); + return err; } static void objagg_obj_root_destroy(struct objagg *objagg, @@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg, { trace_objagg_obj_root_destroy(objagg, objagg_obj); objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv); + objagg_obj_root_id_free(objagg, objagg_obj); +} + +static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj); + +static int objagg_obj_init_with_hints(struct objagg *objagg, + struct objagg_obj *objagg_obj, + bool *hint_found) +{ + struct objagg_hints_node *hnode; + struct objagg_obj *parent; + int err; + + hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj); + if (!hnode) { + *hint_found = false; + return 0; + } + *hint_found = true; + + if (!hnode->parent) + return objagg_obj_root_create(objagg, objagg_obj, hnode); + + parent = __objagg_obj_get(objagg, hnode->parent->obj); + if (IS_ERR(parent)) + return PTR_ERR(parent); + + err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false); + if (err) { + *hint_found = false; + err = 0; + goto err_parent_assign; + } + + return 0; + +err_parent_assign: + objagg_obj_put(objagg, parent); + return err; } static int objagg_obj_init(struct objagg *objagg, struct objagg_obj *objagg_obj) { + bool hint_found; int err; + /* First, try to use hints if they are available and + * if they provide result. + */ + err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found); + if (err) + return err; + + if (hint_found) + return 0; + /* Try to find if the object can be aggregated under an existing one. */ err = objagg_obj_parent_lookup_assign(objagg, objagg_obj); if (!err) return 0; /* If aggregation is not possible, make the object a root. */ - return objagg_obj_root_create(objagg, objagg_obj); + return objagg_obj_root_create(objagg, objagg_obj, NULL); } static void objagg_obj_fini(struct objagg *objagg, @@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put); /** * objagg_create - creates a new objagg instance - * @ops: user-specific callbacks - * @priv: pointer to a private data passed to the ops + * @ops: user-specific callbacks + * @objagg_hints: hints, can be NULL + * @priv: pointer to a private data passed to the ops * * Note: all locking must be provided by the caller. * @@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put); * Returns a pointer to newly created objagg instance in case of success, * otherwise it returns pointer error using ERR_PTR macro. */ -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *objagg_hints, void *priv) { struct objagg *objagg; int err; if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy || - !ops->delta_create || !ops->delta_destroy)) + !ops->delta_check || !ops->delta_create || + !ops->delta_destroy)) return ERR_PTR(-EINVAL); + objagg = kzalloc(sizeof(*objagg), GFP_KERNEL); if (!objagg) return ERR_PTR(-ENOMEM); objagg->ops = ops; + if (objagg_hints) { + objagg->hints = objagg_hints; + objagg_hints->refcount++; + } objagg->priv = priv; INIT_LIST_HEAD(&objagg->obj_list); @@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) if (err) goto err_rhashtable_init; + ida_init(&objagg->root_ida); + trace_objagg_create(objagg); return objagg; @@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create); void objagg_destroy(struct objagg *objagg) { trace_objagg_destroy(objagg); + ida_destroy(&objagg->root_ida); WARN_ON(!list_empty(&objagg->obj_list)); rhashtable_destroy(&objagg->obj_ht); + if (objagg->hints) + objagg_hints_put(objagg->hints); kfree(objagg); } EXPORT_SYMBOL(objagg_destroy); @@ -496,6 +645,404 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats) } EXPORT_SYMBOL(objagg_stats_put); +static struct objagg_hints_node * +objagg_hints_node_create(struct objagg_hints *objagg_hints, + struct objagg_obj *objagg_obj, size_t obj_size, + struct objagg_hints_node *parent_hnode) +{ + unsigned int user_count = objagg_obj->stats.user_count; + struct objagg_hints_node *hnode; + int err; + + hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL); + if (!hnode) + return ERR_PTR(-ENOMEM); + memcpy(hnode->obj, &objagg_obj->obj, obj_size); + hnode->stats_info.stats.user_count = user_count; + hnode->stats_info.stats.delta_user_count = user_count; + if (parent_hnode) { + parent_hnode->stats_info.stats.delta_user_count += user_count; + } else { + hnode->root_id = objagg_hints->root_count++; + hnode->stats_info.is_root = true; + } + hnode->stats_info.objagg_obj = objagg_obj; + + err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + if (err) + goto err_ht_insert; + + list_add(&hnode->list, &objagg_hints->node_list); + hnode->parent = parent_hnode; + objagg_hints->node_count++; + + return hnode; + +err_ht_insert: + kfree(hnode); + return ERR_PTR(err); +} + +static void objagg_hints_flush(struct objagg_hints *objagg_hints) +{ + struct objagg_hints_node *hnode, *tmp; + + list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) { + list_del(&hnode->list); + rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + kfree(hnode); + } +} + +struct objagg_tmp_node { + struct objagg_obj *objagg_obj; + bool crossed_out; +}; + +struct objagg_tmp_graph { + struct objagg_tmp_node *nodes; + unsigned long nodes_count; + unsigned long *edges; +}; + +static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + return index * graph->nodes_count + parent_index; +} + +static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + __set_bit(edge_index, graph->edges); +} + +static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + return test_bit(edge_index, graph->edges); +} + +static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, + unsigned int index) +{ + struct objagg_tmp_node *node = &graph->nodes[index]; + unsigned int weight = node->objagg_obj->stats.user_count; + int j; + + /* Node weight is sum of node users and all other nodes users + * that this node can represent with delta. + */ + + if (node->crossed_out) + return 0; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + weight += node->objagg_obj->stats.user_count; + } + return weight; +} + +static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph) +{ + unsigned int max_weight = 0; + unsigned int weight; + int max_index = -1; + int i; + + for (i = 0; i < graph->nodes_count; i++) { + weight = objagg_tmp_graph_node_weight(graph, i); + if (weight > max_weight) { + max_weight = weight; + max_index = i; + } + } + return max_index; +} + +static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) +{ + unsigned int nodes_count = objagg->obj_count; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + struct objagg_tmp_node *pnode; + struct objagg_obj *objagg_obj; + size_t alloc_size; + int i, j; + + graph = kzalloc(sizeof(*graph), GFP_KERNEL); + if (!graph) + return NULL; + + graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL); + if (!graph->nodes) + goto err_nodes_alloc; + graph->nodes_count = nodes_count; + + alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) * + sizeof(unsigned long); + graph->edges = kzalloc(alloc_size, GFP_KERNEL); + if (!graph->edges) + goto err_edges_alloc; + + i = 0; + list_for_each_entry(objagg_obj, &objagg->obj_list, list) { + node = &graph->nodes[i++]; + node->objagg_obj = objagg_obj; + } + + /* Assemble a temporary graph. Insert edge X->Y in case Y can be + * in delta of X. + */ + for (i = 0; i < nodes_count; i++) { + for (j = 0; j < nodes_count; j++) { + if (i == j) + continue; + pnode = &graph->nodes[i]; + node = &graph->nodes[j]; + if (objagg->ops->delta_check(objagg->priv, + pnode->objagg_obj->obj, + node->objagg_obj->obj)) { + objagg_tmp_graph_edge_set(graph, i, j); + + } + } + } + return graph; + +err_edges_alloc: + kfree(graph->nodes); +err_nodes_alloc: + kfree(graph); + return NULL; +} + +static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph) +{ + kfree(graph->edges); + kfree(graph->nodes); + kfree(graph); +} + +static int +objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints, + struct objagg *objagg) +{ + struct objagg_hints_node *hnode, *parent_hnode; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + int index; + int j; + int err; + + graph = objagg_tmp_graph_create(objagg); + if (!graph) + return -ENOMEM; + + /* Find the nodes from the ones that can accommodate most users + * and cross them out of the graph. Save them to the hint list. + */ + while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) { + node = &graph->nodes[index]; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + NULL); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + parent_hnode = hnode; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + parent_hnode); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + } + } + + err = 0; +out: + objagg_tmp_graph_destroy(graph); + return err; +} + +struct objagg_opt_algo { + int (*fillup_hints)(struct objagg_hints *objagg_hints, + struct objagg *objagg); +}; + +static const struct objagg_opt_algo objagg_opt_simple_greedy = { + .fillup_hints = objagg_opt_simple_greedy_fillup_hints, +}; + + +static const struct objagg_opt_algo *objagg_opt_algos[] = { + [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, +}; + +static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + struct objagg_hints *objagg_hints = + container_of(ht, struct objagg_hints, node_ht); + const struct objagg_ops *ops = objagg_hints->ops; + const char *ptr = obj; + + ptr += ht->p.key_offset; + return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : + memcmp(ptr, arg->key, ht->p.key_len); +} + +/** + * objagg_hints_get - obtains hints instance + * @objagg: objagg instance + * @opt_algo_type: type of hints finding algorithm + * + * Note: all locking must be provided by the caller. + * + * According to the algo type, the existing objects of objagg instance + * are going to be went-through to assemble an optimal tree. We call this + * tree hints. These hints can be later on used for creation of + * a new objagg instance. There, the future object creations are going + * to be consulted with these hints in order to find out, where exactly + * the new object should be put as a root or delta. + * + * Returns a pointer to hints instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type) +{ + const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type]; + struct objagg_hints *objagg_hints; + int err; + + objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL); + if (!objagg_hints) + return ERR_PTR(-ENOMEM); + + objagg_hints->ops = objagg->ops; + objagg_hints->refcount = 1; + + INIT_LIST_HEAD(&objagg_hints->node_list); + + objagg_hints->ht_params.key_len = objagg->ops->obj_size; + objagg_hints->ht_params.key_offset = + offsetof(struct objagg_hints_node, obj); + objagg_hints->ht_params.head_offset = + offsetof(struct objagg_hints_node, ht_node); + objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; + + err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); + if (err) + goto err_rhashtable_init; + + err = algo->fillup_hints(objagg_hints, objagg); + if (err) + goto err_fillup_hints; + + if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) + goto err_node_count_check; + + return objagg_hints; + +err_node_count_check: +err_fillup_hints: + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); +err_rhashtable_init: + kfree(objagg_hints); + return ERR_PTR(err); +} +EXPORT_SYMBOL(objagg_hints_get); + +/** + * objagg_hints_put - puts hints instance + * @objagg_hints: objagg hints instance + * + * Note: all locking must be provided by the caller. + */ +void objagg_hints_put(struct objagg_hints *objagg_hints) +{ + if (--objagg_hints->refcount) + return; + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); + kfree(objagg_hints); +} +EXPORT_SYMBOL(objagg_hints_put); + +/** + * objagg_hints_stats_get - obtains stats of the hints instance + * @objagg_hints: hints instance + * + * Note: all locking must be provided by the caller. + * + * The returned structure contains statistics of all objects + * currently in use, ordered by following rules: + * 1) Root objects are always on lower indexes than the rest. + * 2) Objects with higher delta user count are always on lower + * indexes. + * 3) In case multiple objects have the same delta user count, + * the objects are ordered by user count. + * + * Returns a pointer to stats instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints) +{ + struct objagg_stats *objagg_stats; + struct objagg_hints_node *hnode; + int i; + + objagg_stats = kzalloc(struct_size(objagg_stats, stats_info, + objagg_hints->node_count), + GFP_KERNEL); + if (!objagg_stats) + return ERR_PTR(-ENOMEM); + + i = 0; + list_for_each_entry(hnode, &objagg_hints->node_list, list) { + memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, + sizeof(objagg_stats->stats_info[0])); + i++; + } + objagg_stats->stats_info_count = i; + + sort(objagg_stats->stats_info, objagg_stats->stats_info_count, + sizeof(struct objagg_obj_stats_info), + objagg_stats_info_sort_cmp_func, NULL); + + return objagg_stats; +} +EXPORT_SYMBOL(objagg_hints_stats_get); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko "); MODULE_DESCRIPTION("Object aggregation manager"); diff --git a/lib/test_objagg.c b/lib/test_objagg.c index ab57144bb0cd..3744573b6365 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg, #define MAX_KEY_ID_DIFF 5 +static bool delta_check(void *priv, const void *parent_obj, const void *obj) +{ + const struct tokey *parent_key = parent_obj; + const struct tokey *key = obj; + int diff = key->id - parent_key->id; + + return diff >= 0 && diff <= MAX_KEY_ID_DIFF; +} + static void *delta_create(void *priv, void *parent_obj, void *obj) { struct tokey *parent_key = parent_obj; @@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj) int diff = key->id - parent_key->id; struct delta *delta; - if (diff < 0 || diff > MAX_KEY_ID_DIFF) + if (!delta_check(priv, parent_obj, obj)) return ERR_PTR(-EINVAL); delta = kzalloc(sizeof(*delta), GFP_KERNEL); @@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *root_create(void *priv, void *obj) +static void *root_create(void *priv, void *obj, unsigned int id) { struct world *world = priv; struct tokey *key = obj; @@ -268,6 +277,12 @@ stats_put: return err; } +static bool delta_check_dummy(void *priv, const void *parent_obj, + const void *obj) +{ + return false; +} + static void *delta_create_dummy(void *priv, void *parent_obj, void *obj) { return ERR_PTR(-EOPNOTSUPP); @@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv) static const struct objagg_ops nodelta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check_dummy, .delta_create = delta_create_dummy, .delta_destroy = delta_destroy_dummy, .root_create = root_create, @@ -292,7 +308,7 @@ static int test_nodelta(void) int i; int err; - objagg = objagg_create(&nodelta_ops, &world); + objagg = objagg_create(&nodelta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -357,6 +373,7 @@ err_stats_second_zero: static const struct objagg_ops delta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check, .delta_create = delta_create, .delta_destroy = delta_destroy, .root_create = root_create, @@ -793,7 +810,7 @@ static int test_delta(void) int i; int err; - objagg = objagg_create(&delta_ops, &world); + objagg = objagg_create(&delta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -815,6 +832,170 @@ err_do_action_item: return err; } +struct hints_case { + const unsigned int *key_ids; + size_t key_ids_count; + struct expect_stats expect_stats; + struct expect_stats expect_stats_hints; +}; + +static const unsigned int hints_case_key_ids[] = { + 1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8, +}; + +static const struct hints_case hints_case = { + .key_ids = hints_case_key_ids, + .key_ids_count = ARRAY_SIZE(hints_case_key_ids), + .expect_stats = + EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(3, 2), + DELTA(5, 2), DELTA(6, 1)), + .expect_stats_hints = + EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(5, 2), + DELTA(6, 1), DELTA(7, 1)), +}; + +static void __pr_debug_stats(const struct objagg_stats *stats) +{ + int i; + + for (i = 0; i < stats->stats_info_count; i++) + pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i, + obj_to_key_id(stats->stats_info[i].objagg_obj), + stats->stats_info[i].stats.user_count, + stats->stats_info[i].stats.delta_user_count, + stats->stats_info[i].is_root ? "root" : "noroot"); +} + +static void pr_debug_stats(struct objagg *objagg) +{ + const struct objagg_stats *stats; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static void pr_debug_hints_stats(struct objagg_hints *objagg_hints) +{ + const struct objagg_stats *stats; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static int check_expect_hints_stats(struct objagg_hints *objagg_hints, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + const struct objagg_stats *stats; + int err; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return PTR_ERR(stats); + err = __check_expect_stats(stats, expect_stats, errmsg); + objagg_stats_put(stats); + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world2 = {}; + struct world world = {}; + struct objagg *objagg2; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + pr_debug_hints_stats(hints); + err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Hints stats: %s\n", errmsg); + goto err_check_expect_hints_stats; + } + + objagg2 = objagg_create(&delta_ops, hints, &world2); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world2, objagg2, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world2_obj_get; + } + } + + pr_debug_stats(objagg2); + err = check_expect_stats(objagg2, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Stats2: %s\n", errmsg); + goto err_check_expect_stats2; + } + + err = 0; + +err_check_expect_stats2: +err_world2_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world2, objagg, hints_case->key_ids[i]); + objagg_hints_put(hints); + objagg_destroy(objagg2); + i = hints_case->key_ids_count; +err_check_expect_hints_stats: +err_hints_get: +err_check_expect_stats: +err_world_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, hints_case->key_ids[i]); + + objagg_destroy(objagg); + return err; +} +static int test_hints(void) +{ + return test_hints_case(&hints_case); +} + static int __init test_objagg_init(void) { int err; @@ -822,7 +1003,10 @@ static int __init test_objagg_init(void) err = test_nodelta(); if (err) return err; - return test_delta(); + err = test_delta(); + if (err) + return err; + return test_hints(); } static void __exit test_objagg_exit(void) From 204f6a8c413ec41a7ec5e3f0f0590d4318f7a1f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:47 +0000 Subject: [PATCH 0836/1436] lib: objagg: add root count to stats Count number of roots and add it to stats. It is handy for the library user to have this stats available as it can act upon it without counting roots itself. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/objagg.h | 1 + lib/objagg.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/linux/objagg.h b/include/linux/objagg.h index a675286df1af..78021777df46 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -42,6 +42,7 @@ struct objagg_obj_stats_info { }; struct objagg_stats { + unsigned int root_count; unsigned int stats_info_count; struct objagg_obj_stats_info stats_info[]; }; diff --git a/lib/objagg.c b/lib/objagg.c index befe8a47d080..781f41c3c47d 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -621,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) objagg_stats->stats_info[i].objagg_obj = objagg_obj; objagg_stats->stats_info[i].is_root = objagg_obj_is_root(objagg_obj); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; @@ -1031,6 +1033,8 @@ objagg_hints_stats_get(struct objagg_hints *objagg_hints) list_for_each_entry(hnode, &objagg_hints->node_list, list) { memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, sizeof(objagg_stats->stats_info[0])); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; From 0f54236da0c2b6664c4d08407fe8c2b7ad17d955 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:48 +0000 Subject: [PATCH 0837/1436] mlxsw: spectrum_acl: Split region struct into region and vregion Do the split of region struct so the new region struct is related to the actual HW region, whereas vregion struct is a SW abstration of that. This split prepares possibility for vregion to hold 2 HW regions which is needed for region ERP rehash flow. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 309 +++++++++++------- .../mellanox/mlxsw/spectrum_acl_tcam.h | 5 +- 2 files changed, 185 insertions(+), 129 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 11456e1f236f..092a461cd5fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -153,7 +153,7 @@ struct mlxsw_sp_acl_tcam_pattern { struct mlxsw_sp_acl_tcam_group { struct mlxsw_sp_acl_tcam *tcam; u16 id; - struct list_head region_list; + struct list_head vregion_list; unsigned int region_count; struct rhashtable chunk_ht; struct mlxsw_sp_acl_tcam_group_ops *ops; @@ -163,12 +163,20 @@ struct mlxsw_sp_acl_tcam_group { struct mlxsw_afk_element_usage tmplt_elusage; }; -struct mlxsw_sp_acl_tcam_chunk { - struct list_head list; /* Member of a TCAM region */ - struct rhash_head ht_node; /* Member of a chunk HT */ - unsigned int priority; /* Priority within the region and group */ - struct mlxsw_sp_acl_tcam_group *group; +struct mlxsw_sp_acl_tcam_vregion { struct mlxsw_sp_acl_tcam_region *region; + struct list_head list; /* Member of a TCAM group */ + struct list_head chunk_list; /* List of chunks under this vregion */ + struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_afk_key_info *key_info; +}; + +struct mlxsw_sp_acl_tcam_chunk { + struct list_head list; /* Member of a TCAM vregion */ + struct rhash_head ht_node; /* Member of a chunk HT */ + unsigned int priority; /* Priority within the vregion and group */ + struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_sp_acl_tcam_vregion *vregion; unsigned int ref_count; unsigned long priv[0]; /* priv has to be always the last item */ @@ -190,13 +198,14 @@ static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_group *group) { - struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_vregion *vregion; char pagt_pl[MLXSW_REG_PAGT_LEN]; int acl_index = 0; mlxsw_reg_pagt_pack(pagt_pl, group->id); - list_for_each_entry(region, &group->region_list, list) - mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id); + list_for_each_entry(vregion, &group->vregion_list, list) + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + vregion->region->id); mlxsw_reg_pagt_size_set(pagt_pl, acl_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); } @@ -219,7 +228,7 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp, memcpy(&group->tmplt_elusage, tmplt_elusage, sizeof(group->tmplt_elusage)); } - INIT_LIST_HEAD(&group->region_list); + INIT_LIST_HEAD(&group->vregion_list); err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); if (err) return err; @@ -243,7 +252,7 @@ static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp, rhashtable_destroy(&group->chunk_ht); mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); - WARN_ON(!list_empty(&group->region_list)); + WARN_ON(!list_empty(&group->vregion_list)); } static int @@ -283,140 +292,150 @@ mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) } static unsigned int -mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { struct mlxsw_sp_acl_tcam_chunk *chunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->chunk_list)) return 0; - /* As a priority of a region, return priority of the first chunk */ - chunk = list_first_entry(®ion->chunk_list, typeof(*chunk), list); + /* As a priority of a vregion, return priority of the first chunk */ + chunk = list_first_entry(&vregion->chunk_list, + typeof(*chunk), list); return chunk->priority; } static unsigned int -mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { struct mlxsw_sp_acl_tcam_chunk *chunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->chunk_list)) return 0; - chunk = list_last_entry(®ion->chunk_list, typeof(*chunk), list); + chunk = list_last_entry(&vregion->chunk_list, + typeof(*chunk), list); return chunk->priority; } -static void -mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) -{ - struct mlxsw_sp_acl_tcam_region *region2; - struct list_head *pos; - - /* Position the region inside the list according to priority */ - list_for_each(pos, &group->region_list) { - region2 = list_entry(pos, typeof(*region2), list); - if (mlxsw_sp_acl_tcam_region_prio(region2) > - mlxsw_sp_acl_tcam_region_prio(region)) - break; - } - list_add_tail(®ion->list, pos); - group->region_count++; -} - -static void -mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) -{ - group->region_count--; - list_del(®ion->list); -} - static int mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, struct mlxsw_sp_acl_tcam_region *region) { + struct mlxsw_sp_acl_tcam_group *group = region->vregion->group; int err; if (group->region_count == group->tcam->max_group_size) return -ENOBUFS; - mlxsw_sp_acl_tcam_group_list_add(group, region); - err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); if (err) - goto err_group_update; - region->group = group; + return err; + group->region_count++; return 0; - -err_group_update: - mlxsw_sp_acl_tcam_group_list_del(group, region); - mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); - return err; } static void mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region) { - struct mlxsw_sp_acl_tcam_group *group = region->group; + struct mlxsw_sp_acl_tcam_group *group = region->vregion->group; - mlxsw_sp_acl_tcam_group_list_del(group, region); + group->region_count--; mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); } -static struct mlxsw_sp_acl_tcam_region * -mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - bool *p_need_split) +static int +mlxsw_sp_acl_tcam_group_vregion_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_region *region, *region2; + struct mlxsw_sp_acl_tcam_vregion *vregion2; + struct list_head *pos; + int err; + + /* Position the vregion inside the list according to priority */ + list_for_each(pos, &group->vregion_list) { + vregion2 = list_entry(pos, typeof(*vregion2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) > + mlxsw_sp_acl_tcam_vregion_prio(vregion)) + break; + } + list_add_tail(&vregion->list, pos); + vregion->group = group; + + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, vregion->region); + if (err) + goto err_region_attach; + + return 0; + +err_region_attach: + list_del(&vregion->list); + return err; +} + +static void +mlxsw_sp_acl_tcam_group_vregion_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + list_del(&vregion->list); + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region); +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_group_vregion_find(struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + bool *p_need_split) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2; struct list_head *pos; bool issubset; - list_for_each(pos, &group->region_list) { - region = list_entry(pos, typeof(*region), list); + list_for_each(pos, &group->vregion_list) { + vregion = list_entry(pos, typeof(*vregion), list); /* First, check if the requested priority does not rather belong - * under some of the next regions. + * under some of the next vregions. */ - if (pos->next != &group->region_list) { /* not last */ - region2 = list_entry(pos->next, typeof(*region2), list); - if (priority >= mlxsw_sp_acl_tcam_region_prio(region2)) + if (pos->next != &group->vregion_list) { /* not last */ + vregion2 = list_entry(pos->next, typeof(*vregion2), + list); + if (priority >= + mlxsw_sp_acl_tcam_vregion_prio(vregion2)) continue; } - issubset = mlxsw_afk_key_info_subset(region->key_info, elusage); + issubset = mlxsw_afk_key_info_subset(vregion->key_info, + elusage); /* If requested element usage would not fit and the priority - * is lower than the currently inspected region we cannot - * use this region, so return NULL to indicate new region has + * is lower than the currently inspected vregion we cannot + * use this region, so return NULL to indicate new vregion has * to be created. */ if (!issubset && - priority < mlxsw_sp_acl_tcam_region_prio(region)) + priority < mlxsw_sp_acl_tcam_vregion_prio(vregion)) return NULL; /* If requested element usage would not fit and the priority - * is higher than the currently inspected region we cannot - * use this region. There is still some hope that the next - * region would be the fit. So let it be processed and + * is higher than the currently inspected vregion we cannot + * use this vregion. There is still some hope that the next + * vregion would be the fit. So let it be processed and * eventually break at the check right above this. */ if (!issubset && - priority > mlxsw_sp_acl_tcam_region_max_prio(region)) + priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion)) continue; - /* Indicate if the region needs to be split in order to add + /* Indicate if the vregion needs to be split in order to add * the requested priority. Split is needed when requested - * element usage won't fit into the found region. + * element usage won't fit into the found vregion. */ *p_need_split = !issubset; - return region; + return vregion; } - return NULL; /* New region has to be created. */ + return NULL; /* New vregion has to be created. */ } static void @@ -511,24 +530,18 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_region * mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vregion *vregion) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_tcam_region *region; int err; region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL); if (!region) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(®ion->chunk_list); region->mlxsw_sp = mlxsw_sp; - - region->key_info = mlxsw_afk_key_info_get(afk, elusage); - if (IS_ERR(region->key_info)) { - err = PTR_ERR(region->key_info); - goto err_key_info_get; - } + region->vregion = vregion; + region->key_info = vregion->key_info; err = mlxsw_sp_acl_tcam_region_id_get(tcam, ®ion->id); if (err) @@ -561,8 +574,6 @@ err_tcam_region_alloc: err_tcam_region_associate: mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); err_region_id_get: - mlxsw_afk_key_info_put(region->key_info); -err_key_info_get: kfree(region); return ERR_PTR(err); } @@ -576,11 +587,56 @@ mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, ops->region_fini(mlxsw_sp, region->priv); mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id); - mlxsw_afk_key_info_put(region->key_info); + mlxsw_sp_acl_tcam_region_id_put(region->vregion->group->tcam, + region->id); kfree(region); } +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_tcam_vregion *vregion; + int err; + + vregion = kzalloc(sizeof(*vregion), GFP_KERNEL); + if (!vregion) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vregion->chunk_list); + + vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); + if (IS_ERR(vregion->key_info)) { + err = PTR_ERR(vregion->key_info); + goto err_key_info_get; + } + + vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam, + vregion); + if (IS_ERR(vregion->region)) { + err = PTR_ERR(vregion->region); + goto err_region_create; + } + + return vregion; + +err_region_create: + mlxsw_afk_key_info_put(vregion->key_info); +err_key_info_get: + kfree(vregion); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); + mlxsw_afk_key_info_put(vregion->key_info); + kfree(vregion); +} + static int mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_group *group, @@ -588,48 +644,49 @@ mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_afk_element_usage *elusage, struct mlxsw_sp_acl_tcam_chunk *chunk) { - struct mlxsw_sp_acl_tcam_region *region; - bool region_created = false; + struct mlxsw_sp_acl_tcam_vregion *vregion; + bool vregion_created = false; bool need_split; int err; - region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage, - &need_split); - if (region && need_split) { + vregion = mlxsw_sp_acl_tcam_group_vregion_find(group, priority, elusage, + &need_split); + if (vregion && need_split) { /* According to priority, the chunk should belong to an - * existing region. However, this chunk needs elements - * that region does not contain. We need to split the existing - * region into two and create a new region for this chunk + * existing vregion. However, this chunk needs elements + * that vregion does not contain. We need to split the existing + * vregion into two and create a new vregion for this chunk * in between. This is not supported now. */ return -EOPNOTSUPP; } - if (!region) { - struct mlxsw_afk_element_usage region_elusage; + if (!vregion) { + struct mlxsw_afk_element_usage vregion_elusage; mlxsw_sp_acl_tcam_group_use_patterns(group, elusage, - ®ion_elusage); - region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam, - ®ion_elusage); - if (IS_ERR(region)) - return PTR_ERR(region); - region_created = true; + &vregion_elusage); + vregion = mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp, + group->tcam, + &vregion_elusage); + if (IS_ERR(vregion)) + return PTR_ERR(vregion); + vregion_created = true; } - chunk->region = region; - list_add_tail(&chunk->list, ®ion->chunk_list); + chunk->vregion = vregion; + list_add_tail(&chunk->list, &vregion->chunk_list); - if (!region_created) + if (!vregion_created) return 0; - err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region); + err = mlxsw_sp_acl_tcam_group_vregion_attach(mlxsw_sp, group, vregion); if (err) - goto err_group_region_attach; + goto err_group_vregion_attach; return 0; -err_group_region_attach: - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); +err_group_vregion_attach: + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); return err; } @@ -637,12 +694,12 @@ static void mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_chunk *chunk) { - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_sp_acl_tcam_vregion *vregion = chunk->vregion; list_del(&chunk->list); - if (list_empty(®ion->chunk_list)) { - mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); + if (list_empty(&vregion->chunk_list)) { + mlxsw_sp_acl_tcam_group_vregion_detach(mlxsw_sp, vregion); + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); } } @@ -671,7 +728,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_chunk_assoc; - ops->chunk_init(chunk->region->priv, chunk->priv, priority); + ops->chunk_init(chunk->vregion->region->priv, chunk->priv, priority); err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, mlxsw_sp_acl_tcam_chunk_ht_params); @@ -713,7 +770,7 @@ mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp, chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority, mlxsw_sp_acl_tcam_chunk_ht_params); if (chunk) { - if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info, + if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->vregion->key_info, elusage))) return ERR_PTR(-EINVAL); chunk->ref_count++; @@ -753,7 +810,7 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(chunk)) return PTR_ERR(chunk); - region = chunk->region; + region = chunk->vregion->region; err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, entry->priv, rulei); @@ -773,7 +830,7 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); @@ -786,7 +843,7 @@ mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; return ops->entry_action_replace(mlxsw_sp, region->priv, entry->priv, rulei); @@ -799,7 +856,7 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; return ops->entry_activity_get(mlxsw_sp, region->priv, entry->priv, activity); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 0858d5b06353..7e961a64eeaf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -67,11 +67,10 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE) struct mlxsw_sp_acl_tcam_group; +struct mlxsw_sp_acl_tcam_vregion; struct mlxsw_sp_acl_tcam_region { - struct list_head list; /* Member of a TCAM group */ - struct list_head chunk_list; /* List of chunks under this region */ - struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_sp_acl_tcam_vregion *vregion; enum mlxsw_reg_ptar_key_type key_type; u16 id; /* ACL ID and region ID - they are same */ char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; From b2d6b4d2beee81e1c30501954dc4ba198bb996c4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:49 +0000 Subject: [PATCH 0838/1436] mlxsw: spectrum_acl: Split chunk struct into chunk and vchunk Do the split of chunk struct so the new chunk struct is related to the actual HW chunk (differs between Spectrum and Spectrum-2), whereas vchunk struct is a SW abstraction of that. This split prepares possibility for vchunk to hold 2 HW chunks which is needed for region ERP rehash flow. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 268 ++++++++++-------- 1 file changed, 156 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 092a461cd5fb..091df6e0f805 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -155,7 +155,7 @@ struct mlxsw_sp_acl_tcam_group { u16 id; struct list_head vregion_list; unsigned int region_count; - struct rhashtable chunk_ht; + struct rhashtable vchunk_ht; struct mlxsw_sp_acl_tcam_group_ops *ops; const struct mlxsw_sp_acl_tcam_pattern *patterns; unsigned int patterns_count; @@ -166,32 +166,39 @@ struct mlxsw_sp_acl_tcam_group { struct mlxsw_sp_acl_tcam_vregion { struct mlxsw_sp_acl_tcam_region *region; struct list_head list; /* Member of a TCAM group */ - struct list_head chunk_list; /* List of chunks under this vregion */ + struct list_head vchunk_list; /* List of vchunks under this vregion */ struct mlxsw_sp_acl_tcam_group *group; struct mlxsw_afk_key_info *key_info; }; +struct mlxsw_sp_acl_tcam_vchunk; + struct mlxsw_sp_acl_tcam_chunk { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_tcam_vchunk { + struct mlxsw_sp_acl_tcam_chunk *chunk; struct list_head list; /* Member of a TCAM vregion */ struct rhash_head ht_node; /* Member of a chunk HT */ unsigned int priority; /* Priority within the vregion and group */ struct mlxsw_sp_acl_tcam_group *group; struct mlxsw_sp_acl_tcam_vregion *vregion; unsigned int ref_count; - unsigned long priv[0]; - /* priv has to be always the last item */ }; struct mlxsw_sp_acl_tcam_entry { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; unsigned long priv[0]; /* priv has to be always the last item */ }; -static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { +static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { .key_len = sizeof(unsigned int), - .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority), - .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node), + .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority), + .head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node), .automatic_shrinking = true, }; @@ -233,8 +240,8 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp, if (err) return err; - err = rhashtable_init(&group->chunk_ht, - &mlxsw_sp_acl_tcam_chunk_ht_params); + err = rhashtable_init(&group->vchunk_ht, + &mlxsw_sp_acl_tcam_vchunk_ht_params); if (err) goto err_rhashtable_init; @@ -250,7 +257,7 @@ static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam *tcam = group->tcam; - rhashtable_destroy(&group->chunk_ht); + rhashtable_destroy(&group->vchunk_ht); mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); WARN_ON(!list_empty(&group->vregion_list)); } @@ -294,26 +301,26 @@ mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) static unsigned int mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(&vregion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - /* As a priority of a vregion, return priority of the first chunk */ - chunk = list_first_entry(&vregion->chunk_list, - typeof(*chunk), list); - return chunk->priority; + /* As a priority of a vregion, return priority of the first vchunk */ + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } static unsigned int mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(&vregion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - chunk = list_last_entry(&vregion->chunk_list, - typeof(*chunk), list); - return chunk->priority; + vchunk = list_last_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } static int @@ -604,7 +611,7 @@ mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, vregion = kzalloc(sizeof(*vregion), GFP_KERNEL); if (!vregion) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vregion->chunk_list); + INIT_LIST_HEAD(&vregion->vchunk_list); vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); if (IS_ERR(vregion->key_info)) { @@ -638,11 +645,11 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vchunk_assoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { struct mlxsw_sp_acl_tcam_vregion *vregion; bool vregion_created = false; @@ -652,10 +659,10 @@ mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, vregion = mlxsw_sp_acl_tcam_group_vregion_find(group, priority, elusage, &need_split); if (vregion && need_split) { - /* According to priority, the chunk should belong to an - * existing vregion. However, this chunk needs elements + /* According to priority, the vchunk should belong to an + * existing vregion. However, this vchunk needs elements * that vregion does not contain. We need to split the existing - * vregion into two and create a new vregion for this chunk + * vregion into two and create a new vregion for this vchunk * in between. This is not supported now. */ return -EOPNOTSUPP; @@ -673,8 +680,8 @@ mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, vregion_created = true; } - chunk->vregion = vregion; - list_add_tail(&chunk->list, &vregion->chunk_list); + vchunk->vregion = vregion; + list_add_tail(&vchunk->list, &vregion->vchunk_list); if (!vregion_created) return 0; @@ -691,13 +698,13 @@ err_group_vregion_attach: } static void -mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vchunk_deassoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - struct mlxsw_sp_acl_tcam_vregion *vregion = chunk->vregion; + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; - list_del(&chunk->list); - if (list_empty(&vregion->chunk_list)) { + list_del(&vchunk->list); + if (list_empty(&vregion->vchunk_list)) { mlxsw_sp_acl_tcam_group_vregion_detach(mlxsw_sp, vregion); mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); } @@ -705,44 +712,19 @@ mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_chunk * mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk; - int err; - - if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) - return ERR_PTR(-EINVAL); chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); if (!chunk) return ERR_PTR(-ENOMEM); - chunk->priority = priority; - chunk->group = group; - chunk->ref_count = 1; - - err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority, - elusage, chunk); - if (err) - goto err_chunk_assoc; - - ops->chunk_init(chunk->vregion->region->priv, chunk->priv, priority); - - err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); - if (err) - goto err_rhashtable_insert; + chunk->vchunk = vchunk; + ops->chunk_init(region->priv, chunk->priv, vchunk->priority); return chunk; - -err_rhashtable_insert: - ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); -err_chunk_assoc: - kfree(chunk); - return ERR_PTR(err); } static void @@ -750,42 +732,100 @@ mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_chunk *chunk) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_group *group = chunk->group; - rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); kfree(chunk); } -static struct mlxsw_sp_acl_tcam_chunk * -mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; - chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority, - mlxsw_sp_acl_tcam_chunk_ht_params); - if (chunk) { - if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->vregion->key_info, - elusage))) - return ERR_PTR(-EINVAL); - chunk->ref_count++; - return chunk; + if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) + return ERR_PTR(-EINVAL); + + vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL); + if (!vchunk) + return ERR_PTR(-ENOMEM); + vchunk->priority = priority; + vchunk->group = group; + vchunk->ref_count = 1; + + err = mlxsw_sp_acl_tcam_vchunk_assoc(mlxsw_sp, group, priority, + elusage, vchunk); + if (err) + goto err_vchunk_assoc; + + err = rhashtable_insert_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (err) + goto err_rhashtable_insert; + + vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, + vchunk->vregion->region); + if (IS_ERR(vchunk->chunk)) { + err = PTR_ERR(vchunk->chunk); + goto err_chunk_create; } - return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group, - priority, elusage); + + return vchunk; + +err_chunk_create: + rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); +err_rhashtable_insert: + mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk); +err_vchunk_assoc: + kfree(vchunk); + return ERR_PTR(err); } -static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +static void +mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - if (--chunk->ref_count) + struct mlxsw_sp_acl_tcam_group *group = vchunk->group; + + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + mlxsw_sp_acl_tcam_vchunk_deassoc(mlxsw_sp, vchunk); + kfree(vchunk); +} + +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + + vchunk = rhashtable_lookup_fast(&group->vchunk_ht, &priority, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (vchunk) { + if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info, + elusage))) + return ERR_PTR(-EINVAL); + vchunk->ref_count++; + return vchunk; + } + return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, group, + priority, elusage); +} + +static void +mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + if (--vchunk->ref_count) return; - mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk); + mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk); } static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp) @@ -801,27 +841,29 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_chunk *chunk; int err; - chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority, - &rulei->values.elusage); - if (IS_ERR(chunk)) - return PTR_ERR(chunk); + vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, group, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(vchunk)) + return PTR_ERR(vchunk); - region = chunk->vregion->region; + chunk = vchunk->chunk; + region = vchunk->vregion->region; err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, entry->priv, rulei); if (err) goto err_entry_add; - entry->chunk = chunk; + entry->vchunk = vchunk; return 0; err_entry_add: - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); return err; } @@ -829,11 +871,12 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_entry *entry) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; + struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; + struct mlxsw_sp_acl_tcam_chunk *chunk = vchunk->chunk; + struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); } static int @@ -842,8 +885,8 @@ mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; + struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; + struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; return ops->entry_action_replace(mlxsw_sp, region->priv, entry->priv, rulei); @@ -855,8 +898,8 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, bool *activity) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->vregion->region; + struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; + struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; return ops->entry_activity_get(mlxsw_sp, region->priv, entry->priv, activity); @@ -1033,7 +1076,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { }; struct mlxsw_sp_acl_tcam_mr_ruleset { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; struct mlxsw_sp_acl_tcam_group group; }; @@ -1063,10 +1106,11 @@ mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp, * specific ACL Group ID which must exist in HW before multicast router * is initialized. */ - ruleset->chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, &ruleset->group, - 1, tmplt_elusage); - if (IS_ERR(ruleset->chunk)) { - err = PTR_ERR(ruleset->chunk); + ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, + &ruleset->group, 1, + tmplt_elusage); + if (IS_ERR(ruleset->vchunk)) { + err = PTR_ERR(ruleset->vchunk); goto err_chunk_get; } @@ -1082,7 +1126,7 @@ mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv) { struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, ruleset->chunk); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk); mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group); } From c4c2dc54293c556b8a05b9da024f92f8225a9eb8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:50 +0000 Subject: [PATCH 0839/1436] mlxsw: spectrum_acl: Split entry struct into entry and ventry Do the split of entry struct so the new entry struct is related to the actual HW entry, whereas ventry struct is a SW abstration of that. This split prepares possibility for ventry to hold 2 HW entries which is needed for region ERP rehash flow. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_acl.c | 2 +- .../mellanox/mlxsw/spectrum_acl_tcam.c | 170 +++++++++++------- .../mellanox/mlxsw/spectrum_acl_tcam.h | 2 +- 3 files changed, 108 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 2649d128a57c..38e027815393 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -640,7 +640,7 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_sp_acl_ruleset_ref_inc(ruleset); - rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp), + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL); if (!rule) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 091df6e0f805..36921ed1d2d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -190,11 +190,16 @@ struct mlxsw_sp_acl_tcam_vchunk { }; struct mlxsw_sp_acl_tcam_entry { - struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_ventry *ventry; unsigned long priv[0]; /* priv has to be always the last item */ }; +struct mlxsw_sp_acl_tcam_ventry { + struct mlxsw_sp_acl_tcam_entry *entry; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; +}; + static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { .key_len = sizeof(unsigned int), .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority), @@ -828,65 +833,52 @@ mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk); } -static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp) +static struct mlxsw_sp_acl_tcam_entry * +mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_chunk *chunk, + struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - - return ops->entry_priv_size; -} - -static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_entry *entry, - struct mlxsw_sp_acl_rule_info *rulei) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_vchunk *vchunk; - struct mlxsw_sp_acl_tcam_region *region; - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_entry *entry; int err; - vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, group, rulei->priority, - &rulei->values.elusage); - if (IS_ERR(vchunk)) - return PTR_ERR(vchunk); - - chunk = vchunk->chunk; - region = vchunk->vregion->region; + entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + entry->ventry = ventry; err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, entry->priv, rulei); if (err) goto err_entry_add; - entry->vchunk = vchunk; - return 0; + return entry; err_entry_add: - mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); - return err; + kfree(entry); + return ERR_PTR(err); } -static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_entry *entry) +static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_chunk *chunk, + struct mlxsw_sp_acl_tcam_entry *entry) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; - struct mlxsw_sp_acl_tcam_chunk *chunk = vchunk->chunk; - struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); - mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); + kfree(entry); } static int mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, struct mlxsw_sp_acl_tcam_entry *entry, struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; - struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; return ops->entry_action_replace(mlxsw_sp, region->priv, entry->priv, rulei); @@ -894,17 +886,79 @@ mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, struct mlxsw_sp_acl_tcam_entry *entry, bool *activity) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_vchunk *vchunk = entry->vchunk; - struct mlxsw_sp_acl_tcam_region *region = vchunk->vregion->region; return ops->entry_activity_get(mlxsw_sp, region->priv, entry->priv, activity); } +static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, group, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(vchunk)) + return PTR_ERR(vchunk); + + ventry->vchunk = vchunk; + ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, + vchunk->vregion->region, + vchunk->chunk, rulei); + if (IS_ERR(ventry->entry)) { + err = PTR_ERR(ventry->entry); + goto err_entry_create; + } + + return 0; + +err_entry_create: + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); + return err; +} + +static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, vchunk->vregion->region, + vchunk->chunk, ventry->entry); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); +} + +static int +mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, + vchunk->vregion->region, + ventry->entry, rulei); +} + +static int +mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + bool *activity) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, + vchunk->vregion->region, + ventry->entry, activity); +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC_32_47, @@ -959,7 +1013,7 @@ struct mlxsw_sp_acl_tcam_flower_ruleset { }; struct mlxsw_sp_acl_tcam_flower_rule { - struct mlxsw_sp_acl_tcam_entry entry; + struct mlxsw_sp_acl_tcam_ventry ventry; }; static int @@ -1017,12 +1071,6 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) return mlxsw_sp_acl_tcam_group_id(&ruleset->group); } -static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) + - mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); -} - static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1031,8 +1079,8 @@ mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group, + &rule->ventry, rulei); } static void @@ -1040,7 +1088,7 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); } static int @@ -1057,8 +1105,8 @@ mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, - activity); + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); } static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { @@ -1068,7 +1116,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, - .rule_priv_size = mlxsw_sp_acl_tcam_flower_rule_priv_size, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, .rule_action_replace = mlxsw_sp_acl_tcam_flower_rule_action_replace, @@ -1081,7 +1129,7 @@ struct mlxsw_sp_acl_tcam_mr_ruleset { }; struct mlxsw_sp_acl_tcam_mr_rule { - struct mlxsw_sp_acl_tcam_entry entry; + struct mlxsw_sp_acl_tcam_ventry ventry; }; static int @@ -1155,12 +1203,6 @@ mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv) return mlxsw_sp_acl_tcam_group_id(&ruleset->group); } -static size_t mlxsw_sp_acl_tcam_mr_rule_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - return sizeof(struct mlxsw_sp_acl_tcam_mr_rule) + - mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); -} - static int mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1169,8 +1211,8 @@ mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->group, + &rule->ventry, rulei); } static void @@ -1178,7 +1220,7 @@ mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); } static int @@ -1188,8 +1230,8 @@ mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, &rule->entry, - rulei); + return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry, + rulei); } static int @@ -1198,8 +1240,8 @@ mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, - activity); + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); } static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { @@ -1209,7 +1251,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_mr_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_mr_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_mr_ruleset_group_id, - .rule_priv_size = mlxsw_sp_acl_tcam_mr_rule_priv_size, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_rule), .rule_add = mlxsw_sp_acl_tcam_mr_rule_add, .rule_del = mlxsw_sp_acl_tcam_mr_rule_del, .rule_action_replace = mlxsw_sp_acl_tcam_mr_rule_action_replace, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 7e961a64eeaf..c6d4aac82ddb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -43,7 +43,7 @@ struct mlxsw_sp_acl_profile_ops { struct mlxsw_sp_port *mlxsw_sp_port, bool ingress); u16 (*ruleset_group_id)(void *ruleset_priv); - size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp); + size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); From 29a2102a29085f5505d86bd3cbfe11c6fa08340d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:51 +0000 Subject: [PATCH 0840/1436] mlxsw: spectrum_acl: Implement basic ERP rehash hits creation Introduce an initial implementation of rehash logic in ERP code. Currently, the rehash is considered as needed only in case number of roots in the hints is smaller than the number of roots actually in use. In that case return hints pointer and let it be obtained through the callpath through the Spectrum-2 TCAM op. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 + .../mellanox/mlxsw/spectrum2_acl_tcam.c | 14 ++++ .../mellanox/mlxsw/spectrum_acl_atcam.c | 11 +++ .../mellanox/mlxsw/spectrum_acl_erp.c | 74 +++++++++++++++++++ .../mellanox/mlxsw/spectrum_acl_tcam.h | 6 ++ 5 files changed, 107 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4fe0996c7cdd..6432a1ce51f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -708,6 +708,8 @@ struct mlxsw_sp_acl_tcam_ops { void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); int (*region_associate)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region); + void * (*region_rehash_hints_get)(void *region_priv); + void (*region_rehash_hints_put)(void *hints_priv); size_t chunk_priv_size; void (*chunk_init)(void *region_priv, void *chunk_priv, unsigned int priority); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index d380b3403960..4808b29294d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -166,6 +166,18 @@ mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id); } +static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + + return mlxsw_sp_acl_atcam_rehash_hints_get(®ion->aregion); +} + +static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv); +} + static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, unsigned int priority) { @@ -243,6 +255,8 @@ const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = { .region_init = mlxsw_sp2_acl_tcam_region_init, .region_fini = mlxsw_sp2_acl_tcam_region_fini, .region_associate = mlxsw_sp2_acl_tcam_region_associate, + .region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get, + .region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put, .chunk_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_chunk), .chunk_init = mlxsw_sp2_acl_tcam_chunk_init, .chunk_fini = mlxsw_sp2_acl_tcam_chunk_fini, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index a74a390901ac..e98037e4d03e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -634,3 +634,14 @@ void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam); } + +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + return mlxsw_sp_acl_erp_rehash_hints_get(aregion); +} + +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_erp_rehash_hints_put(hints_priv); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 302070a74f2e..013ab43a7727 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1370,6 +1370,80 @@ mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); } +static int +mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints, bool *p_rehash_needed) +{ + struct objagg *objagg = aregion->erp_table->objagg; + const struct objagg_stats *ostats; + const struct objagg_stats *hstats; + int err; + + *p_rehash_needed = false; + + ostats = objagg_stats_get(objagg); + if (IS_ERR(ostats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n"); + return PTR_ERR(ostats); + } + + hstats = objagg_hints_stats_get(hints); + if (IS_ERR(hstats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n"); + err = PTR_ERR(hstats); + goto err_hints_stats_get; + } + + /* Very basic criterion for now. */ + if (hstats->root_count < ostats->root_count) + *p_rehash_needed = true; + + err = 0; + + objagg_stats_put(hstats); +err_hints_stats_get: + objagg_stats_put(ostats); + return err; +} + +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + struct objagg_hints *hints; + bool rehash_needed; + int err; + + hints = objagg_hints_get(aregion->erp_table->objagg, + OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n"); + return ERR_CAST(hints); + } + err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints, + &rehash_needed); + if (err) + goto errout; + + if (!rehash_needed) { + err = -EAGAIN; + goto errout; + } + return hints; + +errout: + objagg_hints_put(hints); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv) +{ + struct objagg_hints *hints = hints_priv; + + objagg_hints_put(hints); +} + int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion) { struct mlxsw_sp_acl_erp_table *erp_table; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index c6d4aac82ddb..a90942bc1fe9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -229,6 +229,9 @@ int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv); struct mlxsw_sp_acl_erp_delta; @@ -259,6 +262,9 @@ void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_erp_mask *erp_mask, struct mlxsw_sp_acl_atcam_entry *aentry); +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv); int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, From a339bf8aafbc4b9a9234da290b0cc50c50e9c15b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:52 +0000 Subject: [PATCH 0841/1436] mlxsw: spectrum_acl: Pass hints priv all the way to ERP code The hints priv comes from ERP code and it is possible to obtain it from TCAM code. Add arg to appropriate functions so the hints priv could be passed back down to ERP code. Pass NULL now as the follow-up patches would pass an actual hints priv pointer. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 ++- .../net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c | 3 ++- .../net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c | 6 ++++-- .../net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c | 3 ++- .../net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c | 11 +++++++---- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 3 ++- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h | 4 +++- 7 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6432a1ce51f3..3d17b4a368f4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -704,7 +704,8 @@ struct mlxsw_sp_acl_tcam_ops { size_t region_priv_size; int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *region); + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv); void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); int (*region_associate)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c index 6e444525713f..3a636f753607 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -112,7 +112,8 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp1_acl_tcam_region *region = region_priv; int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index 4808b29294d0..6c66a0f1b79e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -139,7 +139,8 @@ static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) static int mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp2_acl_tcam_region *region = region_priv; struct mlxsw_sp2_acl_tcam *tcam = tcam_priv; @@ -147,7 +148,8 @@ mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, region->region = _region; return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam, - ®ion->aregion, _region, + ®ion->aregion, + _region, hints_priv, &mlxsw_sp2_acl_ctcam_region_ops); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index e98037e4d03e..ded4cf658680 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -318,6 +318,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops) { int err; @@ -334,7 +335,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, err = aregion->ops->init(aregion); if (err) goto err_ops_init; - err = mlxsw_sp_acl_erp_region_init(aregion); + err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv); if (err) goto err_erp_region_init; err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 013ab43a7727..e935c36638d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1313,7 +1313,8 @@ static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { }; static struct mlxsw_sp_acl_erp_table * -mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) +mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints) { struct mlxsw_sp_acl_erp_table *erp_table; int err; @@ -1323,7 +1324,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) return ERR_PTR(-ENOMEM); erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, - NULL, aregion); + hints, aregion); if (IS_ERR(erp_table->objagg)) { err = PTR_ERR(erp_table->objagg); goto err_objagg_create; @@ -1444,12 +1445,14 @@ void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv) objagg_hints_put(hints); } -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion) +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv) { struct mlxsw_sp_acl_erp_table *erp_table; + struct objagg_hints *hints = hints_priv; int err; - erp_table = mlxsw_sp_acl_erp_table_create(aregion); + erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints); if (IS_ERR(erp_table)) return PTR_ERR(erp_table); aregion->erp_table = erp_table; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 36921ed1d2d5..1077c893438b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -572,7 +572,8 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_tcam_region_enable; - err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region); + err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, + region, NULL); if (err) goto err_tcam_region_init; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index a90942bc1fe9..c2a340270982 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -206,6 +206,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops); void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, @@ -265,7 +266,8 @@ void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, void * mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv); -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion); +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv); void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); From 5c661f142c1699095fbb80efb48198ef4c6c9d1d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:53 +0000 Subject: [PATCH 0842/1436] mlxsw: reg: Add multi field to PAGT register For Spectrum-2 this allows parallel lookups in multiple regions. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 11 ++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5f8066ab7d40..227720ce3982 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2199,6 +2199,14 @@ MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8); */ MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16); +/* reg_pagt_multi + * Multi-ACL + * 0 - This ACL is the last ACL in the multi-ACL + * 1 - This ACL is part of a multi-ACL + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false); + /* reg_pagt_acl_id * ACL identifier * Access: RW @@ -2212,12 +2220,13 @@ static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id) } static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index, - u16 acl_id) + u16 acl_id, bool multi) { u8 size = mlxsw_reg_pagt_size_get(payload); if (index >= size) mlxsw_reg_pagt_size_set(payload, index + 1); + mlxsw_reg_pagt_multi_set(payload, index, multi); mlxsw_reg_pagt_acl_id_set(payload, index, acl_id); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 1077c893438b..12d202afa233 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -217,7 +217,7 @@ static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_pagt_pack(pagt_pl, group->id); list_for_each_entry(vregion, &group->vregion_list, list) mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, - vregion->region->id); + vregion->region->id, false); mlxsw_reg_pagt_size_set(pagt_pl, acl_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); } From e5e7962ee5c277f769791d1d857d3641200ee0eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:54 +0000 Subject: [PATCH 0843/1436] mlxsw: spectrum_acl: Implement region migration according to hints If the hints are returned, the migration should be started. For that to happen, there is a need to create a second physical region in TCAM with new ERP set by passing the hints and then move chunk by chunk, entry by entry. During the transition, two lookups will occur. One in old region and another in new region. The highest priority rule will be chosen. In an unlikely case that the migration will fail and also rollback to original region will fail the vregion will become in bad state. Everything will work, only no future rehash will be possible. In a follow-up work, this can be resolved by trying to resume the rollback in delayed work and repair the vregion. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 287 ++++++++++++++++-- .../mellanox/mlxsw/spectrum_acl_tcam.h | 2 + 2 files changed, 268 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 12d202afa233..9239ff4e94c4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -23,6 +23,8 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) return ops->priv_size; } +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */ + int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam) { @@ -33,6 +35,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, size_t alloc_size; int err; + tcam->vregion_rehash_intrvl = + MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; + INIT_LIST_HEAD(&tcam->vregion_list); + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_REGIONS); max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); @@ -165,24 +171,33 @@ struct mlxsw_sp_acl_tcam_group { struct mlxsw_sp_acl_tcam_vregion { struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */ struct list_head list; /* Member of a TCAM group */ + struct list_head tlist; /* Member of a TCAM */ struct list_head vchunk_list; /* List of vchunks under this vregion */ struct mlxsw_sp_acl_tcam_group *group; struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp_acl_tcam *tcam; + struct delayed_work rehash_dw; + struct mlxsw_sp *mlxsw_sp; + bool failed_rollback; /* Indicates failed rollback during migration */ }; struct mlxsw_sp_acl_tcam_vchunk; struct mlxsw_sp_acl_tcam_chunk { struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_region *region; unsigned long priv[0]; /* priv has to be always the last item */ }; struct mlxsw_sp_acl_tcam_vchunk { struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */ struct list_head list; /* Member of a TCAM vregion */ struct rhash_head ht_node; /* Member of a chunk HT */ + struct list_head ventry_list; unsigned int priority; /* Priority within the vregion and group */ struct mlxsw_sp_acl_tcam_group *group; struct mlxsw_sp_acl_tcam_vregion *vregion; @@ -191,13 +206,16 @@ struct mlxsw_sp_acl_tcam_vchunk { struct mlxsw_sp_acl_tcam_entry { struct mlxsw_sp_acl_tcam_ventry *ventry; + struct mlxsw_sp_acl_tcam_chunk *chunk; unsigned long priv[0]; /* priv has to be always the last item */ }; struct mlxsw_sp_acl_tcam_ventry { struct mlxsw_sp_acl_tcam_entry *entry; + struct list_head list; /* Member of a TCAM vchunk */ struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_rule_info *rulei; }; static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { @@ -215,9 +233,13 @@ static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, int acl_index = 0; mlxsw_reg_pagt_pack(pagt_pl, group->id); - list_for_each_entry(vregion, &group->vregion_list, list) + list_for_each_entry(vregion, &group->vregion_list, list) { + if (vregion->region2) + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + vregion->region2->id, true); mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, vregion->region->id, false); + } mlxsw_reg_pagt_size_set(pagt_pl, acl_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); } @@ -391,6 +413,9 @@ mlxsw_sp_acl_tcam_group_vregion_detach(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vregion *vregion) { list_del(&vregion->list); + if (vregion->region2) + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, + vregion->region2); mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region); } @@ -542,7 +567,8 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_region * mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, - struct mlxsw_sp_acl_tcam_vregion *vregion) + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_region *region; @@ -573,7 +599,7 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, goto err_tcam_region_enable; err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, - region, NULL); + region, hints_priv); if (err) goto err_tcam_region_init; @@ -605,11 +631,42 @@ mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, kfree(region); } +static void +mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + unsigned long interval = vregion->tcam->vregion_rehash_intrvl; + + if (!interval) + return; + mlxsw_core_schedule_dw(&vregion->rehash_dw, + msecs_to_jiffies(interval)); +} + +static int +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion); + +static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = + container_of(work, struct mlxsw_sp_acl_tcam_vregion, + rehash_dw.work); + + /* TODO: Take rtnl lock here as the rest of the code counts on it + * now. Later, this should be replaced by per-vregion lock. + */ + rtnl_lock(); + mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion); + rtnl_unlock(); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); +} + static struct mlxsw_sp_acl_tcam_vregion * mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, struct mlxsw_afk_element_usage *elusage) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_tcam_vregion *vregion; int err; @@ -618,6 +675,8 @@ mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, if (!vregion) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&vregion->vchunk_list); + vregion->tcam = tcam; + vregion->mlxsw_sp = mlxsw_sp; vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); if (IS_ERR(vregion->key_info)) { @@ -626,12 +685,21 @@ mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, } vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam, - vregion); + vregion, NULL); if (IS_ERR(vregion->region)) { err = PTR_ERR(vregion->region); goto err_region_create; } + list_add_tail(&vregion->tlist, &tcam->vregion_list); + + if (ops->region_rehash_hints_get) { + /* Create the delayed work for vregion periodic rehash */ + INIT_DELAYED_WORK(&vregion->rehash_dw, + mlxsw_sp_acl_tcam_vregion_rehash_work); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); + } + return vregion; err_region_create: @@ -645,6 +713,13 @@ static void mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vregion *vregion) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + if (ops->region_rehash_hints_get) + cancel_delayed_work_sync(&vregion->rehash_dw); + list_del(&vregion->tlist); + if (vregion->region2) + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2); mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); mlxsw_afk_key_info_put(vregion->key_info); kfree(vregion); @@ -728,6 +803,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, if (!chunk) return ERR_PTR(-ENOMEM); chunk->vchunk = vchunk; + chunk->region = region; ops->chunk_init(region->priv, chunk->priv, vchunk->priority); return chunk; @@ -758,6 +834,7 @@ mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp, vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL); if (!vchunk) return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vchunk->ventry_list); vchunk->priority = priority; vchunk->group = group; vchunk->ref_count = 1; @@ -797,6 +874,8 @@ mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_group *group = vchunk->group; + if (vchunk->chunk2) + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); rhashtable_remove_fast(&group->vchunk_ht, &vchunk->ht_node, mlxsw_sp_acl_tcam_vchunk_ht_params); @@ -837,9 +916,7 @@ mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_entry * mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, - struct mlxsw_sp_acl_tcam_region *region, - struct mlxsw_sp_acl_tcam_chunk *chunk, - struct mlxsw_sp_acl_rule_info *rulei) + struct mlxsw_sp_acl_tcam_chunk *chunk) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_entry *entry; @@ -849,9 +926,10 @@ mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, if (!entry) return ERR_PTR(-ENOMEM); entry->ventry = ventry; + entry->chunk = chunk; - err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, - entry->priv, rulei); + err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv, + entry->priv, ventry->rulei); if (err) goto err_entry_add; @@ -863,13 +941,12 @@ err_entry_add: } static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - struct mlxsw_sp_acl_tcam_chunk *chunk, struct mlxsw_sp_acl_tcam_entry *entry) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); + ops->entry_del(mlxsw_sp, entry->chunk->region->priv, + entry->chunk->priv, entry->priv); kfree(entry); } @@ -887,13 +964,12 @@ mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, struct mlxsw_sp_acl_tcam_entry *entry, bool *activity) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - return ops->entry_activity_get(mlxsw_sp, region->priv, + return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv, entry->priv, activity); } @@ -911,14 +987,16 @@ static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(vchunk); ventry->vchunk = vchunk; + ventry->rulei = rulei; ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, - vchunk->vregion->region, - vchunk->chunk, rulei); + vchunk->chunk); if (IS_ERR(ventry->entry)) { err = PTR_ERR(ventry->entry); goto err_entry_create; } + list_add_tail(&ventry->list, &vchunk->ventry_list); + return 0; err_entry_create: @@ -931,8 +1009,8 @@ static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; - mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, vchunk->vregion->region, - vchunk->chunk, ventry->entry); + list_del(&ventry->list); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); } @@ -953,13 +1031,180 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - vchunk->vregion->region, ventry->entry, activity); } +static int +mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk2) +{ + struct mlxsw_sp_acl_tcam_entry *entry2; + + entry2 = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk2); + if (IS_ERR(entry2)) + return PTR_ERR(entry2); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + ventry->entry = entry2; + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + bool this_is_rollback) +{ + struct mlxsw_sp_acl_tcam_ventry *ventry; + struct mlxsw_sp_acl_tcam_chunk *chunk2; + int err; + int err2; + + chunk2 = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); + if (IS_ERR(chunk2)) { + if (this_is_rollback) + vchunk->vregion->failed_rollback = true; + return PTR_ERR(chunk2); + } + vchunk->chunk2 = chunk2; + list_for_each_entry(ventry, &vchunk->ventry_list, list) { + err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk2); + if (err) { + if (this_is_rollback) { + vchunk->vregion->failed_rollback = true; + return err; + } + goto rollback; + } + } + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + vchunk->chunk = chunk2; + vchunk->chunk2 = NULL; + return 0; + +rollback: + /* Migrate the entries back to the original chunk. If some entry + * migration fails, there's no good way how to proceed. Set the + * vregion with "failed_rollback" flag. + */ + list_for_each_entry_continue_reverse(ventry, &vchunk->ventry_list, + list) { + err2 = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk); + if (err2) { + vchunk->vregion->failed_rollback = true; + goto err_rollback; + } + } + + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + vchunk->chunk2 = NULL; + +err_rollback: + return err; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + list_for_each_entry(vchunk, &vregion->vchunk_list, list) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region2, + false); + if (err) + goto rollback; + } + return 0; + +rollback: + list_for_each_entry_continue_reverse(vchunk, &vregion->vchunk_list, + list) { + mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region, true); + } + return err; +} + +static int +mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) +{ + struct mlxsw_sp_acl_tcam_region *region2, *unused_region; + int err; + + region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam, + vregion, hints_priv); + if (IS_ERR(region2)) + return PTR_ERR(region2); + + vregion->region2 = region2; + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, region2); + if (err) + goto err_group_region_attach; + + err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion); + if (!vregion->failed_rollback) { + if (!err) { + /* In case of successful migration, region2 is used and + * the original is unused. + */ + unused_region = vregion->region; + vregion->region = vregion->region2; + } else { + /* In case of failure during migration, the original + * region is still used. + */ + unused_region = vregion->region2; + } + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region); + } + return err; + +err_group_region_attach: + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region2); + return err; +} + +static int +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + void *hints_priv; + int err; + + if (vregion->failed_rollback) + return -EBUSY; + + hints_priv = ops->region_rehash_hints_get(vregion->region->priv); + if (IS_ERR(hints_priv)) { + err = PTR_ERR(hints_priv); + if (err != -EAGAIN) + dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n"); + return err; + } + + err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, hints_priv); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + if (vregion->failed_rollback) + dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); + } + + ops->region_rehash_hints_put(hints_priv); + return err; +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC_32_47, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index c2a340270982..440a3483ed7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -17,6 +17,8 @@ struct mlxsw_sp_acl_tcam { unsigned long *used_groups; /* bit array */ unsigned int max_groups; unsigned int max_group_size; + struct list_head vregion_list; + u32 vregion_rehash_intrvl; /* ms */ unsigned long priv[0]; /* priv has to be always the last item */ }; From 98bbf70c1c41fb9547c3a18c0f1b96f6ebb8eb1d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:55 +0000 Subject: [PATCH 0844/1436] mlxsw: spectrum: add "acl_region_rehash_interval" devlink param Expose new driver-specific "acl_region_rehash_interval" devlink param which would allow user to alter default ACL region rehash interval. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../networking/devlink-params-mlxsw.txt | 8 +++ drivers/net/ethernet/mellanox/mlxsw/core.h | 5 ++ .../net/ethernet/mellanox/mlxsw/spectrum.c | 69 ++++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 + .../ethernet/mellanox/mlxsw/spectrum_acl.c | 16 +++++ .../mellanox/mlxsw/spectrum_acl_tcam.c | 36 ++++++++++ .../mellanox/mlxsw/spectrum_acl_tcam.h | 5 ++ 7 files changed, 139 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/devlink-params-mlxsw.txt b/Documentation/networking/devlink-params-mlxsw.txt index 2c5c67a920c9..c63ea9fc7009 100644 --- a/Documentation/networking/devlink-params-mlxsw.txt +++ b/Documentation/networking/devlink-params-mlxsw.txt @@ -1,2 +1,10 @@ fw_load_policy [DEVICE, GENERIC] Configuration mode: driverinit + +acl_region_rehash_interval [DEVICE, DRIVER-SPECIFIC] + Sets an interval for periodic ACL region rehashes. + The value is in milliseconds, minimal value is "3000". + Value "0" disables the periodic work. + The first rehash will be run right after value is set. + Type: u32 + Configuration mode: runtime diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 4e114f35ee0d..c8e16a305969 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -394,4 +394,9 @@ static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) #endif +enum mlxsw_devlink_param_id { + MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, +}; + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8dd808b7f931..7c9745cecbbd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4413,6 +4413,71 @@ static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core) ARRAY_SIZE(mlxsw_sp_devlink_params)); } +static int +mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp); + return 0; +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32); +} + +static const struct devlink_param mlxsw_sp2_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + "acl_region_rehash_interval", + DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlxsw_sp_params_acl_region_rehash_intrvl_get, + mlxsw_sp_params_acl_region_rehash_intrvl_set, + NULL), +}; + +static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = mlxsw_sp_params_register(mlxsw_core); + if (err) + return err; + + err = devlink_params_register(devlink, mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + if (err) + goto err_devlink_params_register; + + value.vu32 = 0; + devlink_param_driverinit_value_set(devlink, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + value); + return 0; + +err_devlink_params_register: + mlxsw_sp_params_unregister(mlxsw_core); + return err; +} + +static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), + mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + mlxsw_sp_params_unregister(mlxsw_core); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4461,8 +4526,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, - .params_register = mlxsw_sp_params_register, - .params_unregister = mlxsw_sp_params_unregister, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3d17b4a368f4..ceebc91f4f1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -690,6 +690,8 @@ struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val); /* spectrum_acl_tcam.c */ struct mlxsw_sp_acl_tcam; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 38e027815393..a146a44634e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -912,3 +912,19 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_afk_destroy(acl->afk); kfree(acl); } + +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp, + &acl->tcam); +} + +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp, + &acl->tcam, val); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 9239ff4e94c4..f2cb37c0d300 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -24,6 +24,7 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) } #define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam) @@ -725,6 +726,41 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, kfree(vregion); } +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u32 vregion_rehash_intrvl; + + if (WARN_ON(!ops->region_rehash_hints_get)) + return 0; + vregion_rehash_intrvl = tcam->vregion_rehash_intrvl; + return vregion_rehash_intrvl; +} + +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vregion *vregion; + + if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) + return -EINVAL; + if (WARN_ON(!ops->region_rehash_hints_get)) + return -EOPNOTSUPP; + tcam->vregion_rehash_intrvl = val; + rtnl_lock(); + list_for_each_entry(vregion, &tcam->vregion_list, tlist) { + if (val) + mlxsw_core_schedule_dw(&vregion->rehash_dw, 0); + else + cancel_delayed_work_sync(&vregion->rehash_dw); + } + rtnl_unlock(); + return 0; +} + static int mlxsw_sp_acl_tcam_vchunk_assoc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_group *group, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 440a3483ed7b..96bd42a9fbc3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -28,6 +28,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val); int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 *priority, bool fillup_priority); From 3985de7260a7342a474a0792fcd23737341657ce Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:55 +0000 Subject: [PATCH 0845/1436] mlxsw: spectrum_acl: Add couple of vregion rehash tracepoints As vregion rehash is happening in delayed work, add some visibility to the process using a few tracepoints. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 9 ++- include/trace/events/mlxsw.h | 61 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index f2cb37c0d300..7e225a86e3a8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "reg.h" #include "core.h" @@ -1175,6 +1176,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region2, *unused_region; int err; + trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); + region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam, vregion, hints_priv); if (IS_ERR(region2)) @@ -1219,6 +1222,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, void *hints_priv; int err; + trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion); if (vregion->failed_rollback) return -EBUSY; @@ -1233,8 +1237,11 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, hints_priv); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); - if (vregion->failed_rollback) + if (vregion->failed_rollback) { + trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp, + vregion); dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); + } } ops->region_rehash_hints_put(hints_priv); diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h index 6c2bafcade18..a5ce6df9dc49 100644 --- a/include/trace/events/mlxsw.h +++ b/include/trace/events/mlxsw.h @@ -11,6 +11,7 @@ struct mlxsw_sp; struct mlxsw_sp_acl_atcam_region; +struct mlxsw_sp_acl_tcam_vregion; TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill, TP_PROTO(const struct mlxsw_sp *mlxsw_sp, @@ -32,6 +33,66 @@ TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill, __entry->mlxsw_sp, __entry->aregion) ); +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_tcam_vregion *vregion), + + TP_ARGS(mlxsw_sp, vregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, vregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->vregion = vregion; + ), + + TP_printk("mlxsw_sp %p, vregion %p", + __entry->mlxsw_sp, __entry->vregion) +); + #endif /* _MLXSW_TRACEPOINT_H */ /* This part must be outside protection */ From c478d3c34798f40484307335cf845f5d98fc180e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:57 +0000 Subject: [PATCH 0846/1436] selftests: mlxsw: spectrum-2: Add simple delta rehash test Track the basic codepaths of delta rehash handling, using mlxsw tracepoints. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 77 ++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index f1922bf597b0..4d5b880b1d96 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,11 +9,11 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - delta_two_masks_one_key_test bloom_simple_test \ - bloom_complex_test bloom_delta_test" + delta_two_masks_one_key_test delta_simple_rehash_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh -source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh tcflags="skip_hw" @@ -494,6 +494,77 @@ delta_two_masks_one_key_test() log_test "delta two masks one key test ($tcflags)" } +delta_simple_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.1.0/25 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.3.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_err $? "Migrate trace was not hit" + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_fail $? "Migrate trace was hit when no migration should happen" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "delta simple rehash test ($tcflags)" +} + bloom_simple_test() { # Bloom filter requires that the eRP table is used. This test From b5bfc21af5cb3d53f9cee0ef82eaa43762a90f81 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 6 Feb 2019 10:52:30 +0000 Subject: [PATCH 0847/1436] net: sfp: do not probe SFP module before we're attached When we probe a SFP module, we expect to be able to call the upstream device's module_insert() function so that the upstream link can be configured. However, when the upstream device is delayed, we currently may end up probing the module before the upstream device is available, and lose the module_insert() call. Avoid this by holding off probing the module until the SFP bus is properly connected to both the SFP socket driver and the upstream driver. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 2 ++ drivers/net/phy/sfp.c | 30 +++++++++++++++++++++--------- drivers/net/phy/sfp.h | 2 ++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index ad9db652874d..fef701bfad62 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -347,6 +347,7 @@ static int sfp_register_bus(struct sfp_bus *bus) return ret; } } + bus->socket_ops->attach(bus->sfp); if (bus->started) bus->socket_ops->start(bus->sfp); bus->netdev->sfp_bus = bus; @@ -362,6 +363,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus) if (bus->registered) { if (bus->started) bus->socket_ops->stop(bus->sfp); + bus->socket_ops->detach(bus->sfp); if (bus->phydev && ops && ops->disconnect_phy) ops->disconnect_phy(bus->upstream); } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index fd8bb998ae52..68c8fbf099f8 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -184,6 +184,7 @@ struct sfp { struct gpio_desc *gpio[GPIO_MAX]; + bool attached; unsigned int state; struct delayed_work poll; struct delayed_work timeout; @@ -1475,7 +1476,7 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) */ switch (sfp->sm_mod_state) { default: - if (event == SFP_E_INSERT) { + if (event == SFP_E_INSERT && sfp->attached) { sfp_module_tx_disable(sfp); sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); } @@ -1607,6 +1608,19 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) mutex_unlock(&sfp->sm_mutex); } +static void sfp_attach(struct sfp *sfp) +{ + sfp->attached = true; + if (sfp->state & SFP_F_PRESENT) + sfp_sm_event(sfp, SFP_E_INSERT); +} + +static void sfp_detach(struct sfp *sfp) +{ + sfp->attached = false; + sfp_sm_event(sfp, SFP_E_REMOVE); +} + static void sfp_start(struct sfp *sfp) { sfp_sm_event(sfp, SFP_E_DEV_UP); @@ -1667,6 +1681,8 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee, } static const struct sfp_socket_ops sfp_module_ops = { + .attach = sfp_attach, + .detach = sfp_detach, .start = sfp_start, .stop = sfp_stop, .module_info = sfp_module_info, @@ -1834,10 +1850,6 @@ static int sfp_probe(struct platform_device *pdev) dev_info(sfp->dev, "Host maximum power %u.%uW\n", sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10); - sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); - if (!sfp->sfp_bus) - return -ENOMEM; - /* Get the initial state, and always signal TX disable, * since the network interface will not be up. */ @@ -1848,10 +1860,6 @@ static int sfp_probe(struct platform_device *pdev) sfp->state |= SFP_F_RATE_SELECT; sfp_set_state(sfp, sfp->state); sfp_module_tx_disable(sfp); - rtnl_lock(); - if (sfp->state & SFP_F_PRESENT) - sfp_sm_event(sfp, SFP_E_INSERT); - rtnl_unlock(); for (i = 0; i < GPIO_MAX; i++) { if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i]) @@ -1884,6 +1892,10 @@ static int sfp_probe(struct platform_device *pdev) dev_warn(sfp->dev, "No tx_disable pin: SFP modules will always be emitting.\n"); + sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); + if (!sfp->sfp_bus) + return -ENOMEM; + return 0; } diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 31b0acf337e2..64f54b0bbd8c 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -7,6 +7,8 @@ struct sfp; struct sfp_socket_ops { + void (*attach)(struct sfp *sfp); + void (*detach)(struct sfp *sfp); void (*start)(struct sfp *sfp); void (*stop)(struct sfp *sfp); int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo); From 83ef97d1d35c36bec37af6dea51858809c2af527 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 6 Feb 2019 18:56:08 +0100 Subject: [PATCH 0848/1436] net/macb: bindings doc/trivial: fix documentation for sama5d3 10/100 interface This removes a line left while adding the correct compatibility string for sama5d3 10/100 interface. Now use the "atmel,sama5d3-macb" string. Signed-off-by: Nicolas Ferre Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/macb.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 3e17ac1d5d58..f5c414b10e27 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -3,8 +3,7 @@ Required properties: - compatible: Should be "cdns,[-]{macb|gem}" Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. - Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs or the 10/100Mbit IP - available on sama5d3 SoCs. + Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs. Use "cdns,np4-macb" for NP4 SoC devices. Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on From 4973a1276ca89c3f1a1fd32b4c955144e04afd5d Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 6 Feb 2019 18:56:09 +0100 Subject: [PATCH 0849/1436] net/macb: bindings doc: add sam9x60 binding Add the compatibility sting documentation for sam9x60 10/100 interface. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/macb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index f5c414b10e27..174f292d8a3e 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -4,6 +4,7 @@ Required properties: - compatible: Should be "cdns,[-]{macb|gem}" Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs. + Use "cdns,sam9x60-macb" for Microchip sam9x60 SoC. Use "cdns,np4-macb" for NP4 SoC devices. Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on From 3e3e0cdfca483064982329cda07394abff653dd0 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 6 Feb 2019 18:56:10 +0100 Subject: [PATCH 0850/1436] net: macb: add sam9x60-macb compatibility string Add a new compatibility string for this product. It's using at91sam9260-macb layout but has a newer hardware revision: it's safer to use its own string. Signed-off-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2b2882615e8b..eaabe8c278ec 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3943,6 +3943,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,np4-macb", .data = &np4_config }, { .compatible = "cdns,pc302-gem", .data = &pc302gem_config }, { .compatible = "cdns,gem", .data = &pc302gem_config }, + { .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config }, { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config }, { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config }, { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config }, From c09551c6ff7fe16a79a42133bcecba5fc2fc3291 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 6 Feb 2019 19:18:04 +0100 Subject: [PATCH 0851/1436] net: ipv4: use a dedicated counter for icmp_v4 redirect packets According to the algorithm described in the comment block at the beginning of ip_rt_send_redirect, the host should try to send 'ip_rt_redirect_number' ICMP redirect packets with an exponential backoff and then stop sending them at all assuming that the destination ignores redirects. If the device has previously sent some ICMP error packets that are rate-limited (e.g TTL expired) and continues to receive traffic, the redirect packets will never be transmitted. This happens since peer->rate_tokens will be typically greater than 'ip_rt_redirect_number' and so it will never be reset even if the redirect silence timeout (ip_rt_redirect_silence) has elapsed without receiving any packet requiring redirects. Fix it by using a dedicated counter for the number of ICMP redirect packets that has been sent by the host I have not been able to identify a given commit that introduced the issue since ip_rt_send_redirect implements the same rate-limiting algorithm from commit 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- include/net/inetpeer.h | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/route.c | 7 +++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 00b5e7825508..74ff688568a0 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -39,6 +39,7 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ + u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d757b9642d0d..be778599bfed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce92f73cf104..5163b64f8fb3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) From 93c0970493c71f264e6c3c7caf1ff24a9e1de786 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Feb 2019 19:39:52 +0100 Subject: [PATCH 0852/1436] net: phy: consider latched link-down status in polling mode The link status value latches link-down events. To get the current status we read the register twice in genphy_update_link(). There's a potential risk that we miss a link-down event in polling mode. This may cause issues if the user e.g. connects his machine to a different network. On the other hand reading the latched value may cause issues in interrupt mode. Following scenario: - After boot link goes up - phy_start() is called triggering an aneg restart, hence link goes down and link-down info is latched. - After aneg has finished link goes up and triggers an interrupt. Interrupt handler reads link status, means it reads the latched "link is down" info. But there won't be another interrupt as long as link stays up, therefore phylib will never recognize that link is up. Deal with both scenarios by reading the register twice in interrupt mode only. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 10 ++++++++-- drivers/net/phy/phy_device.c | 13 +++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 6adfe1f6319e..706a92a8c1b2 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -141,9 +141,15 @@ int genphy_c45_read_link(struct phy_device *phydev) mmd_mask &= ~BIT(devad); /* The link state is latched low so that momentary link - * drops can be detected. Do not double-read the status - * register if the link is down. + * drops can be detected. Do not double-read the status + * in polling mode to detect such short link drops. */ + if (!phy_polling_mode(phydev)) { + val = phy_read_mmd(phydev, devad, MDIO_STAT1); + if (val < 0) + return val; + } + val = phy_read_mmd(phydev, devad, MDIO_STAT1); if (val < 0) return val; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 891e0178b97f..d490cd2a8962 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1729,10 +1729,15 @@ int genphy_update_link(struct phy_device *phydev) { int status; - /* Do a fake read */ - status = phy_read(phydev, MII_BMSR); - if (status < 0) - return status; + /* The link state is latched low so that momentary link + * drops can be detected. Do not double-read the status + * in polling mode to detect such short link drops. + */ + if (!phy_polling_mode(phydev)) { + status = phy_read(phydev, MII_BMSR); + if (status < 0) + return status; + } /* Read link and autonegotiation status */ status = phy_read(phydev, MII_BMSR); From 1f03f2609f3d76622389ae5b60a7c692a47bcc1b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 12:45:57 -0800 Subject: [PATCH 0853/1436] net: dsa: bcm_sf2: Remove stats mutex We no longer need a dedicated statistics mutex since we leverage b53_common for statistics now. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 1 - drivers/net/dsa/bcm_sf2.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 361fbde76654..a97b18595be9 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1062,7 +1062,6 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, priv); spin_lock_init(&priv->indir_lock); - mutex_init(&priv->stats_mutex); mutex_init(&priv->cfp.lock); INIT_LIST_HEAD(&priv->cfp.rules_list); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index faaef320ec48..83e1d8001447 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -87,9 +87,6 @@ struct bcm_sf2_priv { /* Backing b53_device */ struct b53_device *dev; - /* Mutex protecting access to the MIB counters */ - struct mutex stats_mutex; - struct bcm_sf2_hw_params hw_params; struct bcm_sf2_port_status port_sts[DSA_MAX_PORTS]; From badd62c249f3ef2d5af315ac2666256492e0af49 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 12:45:58 -0800 Subject: [PATCH 0854/1436] net: dsa: bcm_sf2: Prepare for adding CFP statistics In preparation for adding CFP statistics, we will need to overlay the standard B53 statistics, so create specific bcm_sf2_sw_* functions to call into their b53_common.c counterpart. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index a97b18595be9..ce5f4260ea1a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -894,12 +894,30 @@ static const struct b53_io_ops bcm_sf2_io_ops = { .write64 = bcm_sf2_core_write64, }; +static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) +{ + b53_get_strings(ds, port, stringset, data); +} + +static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + b53_get_ethtool_stats(ds, port, data); +} + +static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds, int port, + int sset) +{ + return b53_get_sset_count(ds, port, sset); +} + static const struct dsa_switch_ops bcm_sf2_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = bcm_sf2_sw_setup, - .get_strings = b53_get_strings, - .get_ethtool_stats = b53_get_ethtool_stats, - .get_sset_count = b53_get_sset_count, + .get_strings = bcm_sf2_sw_get_strings, + .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, + .get_sset_count = bcm_sf2_sw_get_sset_count, .get_ethtool_phy_stats = b53_get_ethtool_phy_stats, .get_phy_flags = bcm_sf2_sw_get_phy_flags, .phylink_validate = bcm_sf2_sw_validate, From f4ae9c0840b7e1f9bd2177189a45a21e6f11aa8d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 12:45:59 -0800 Subject: [PATCH 0855/1436] net: dsa: bcm_sf2: Add support for CFP statistics Return CFP policer statistics (Green, Yellow or Red) as part of the standard ethtool statistics. This helps debug when CFP rules may not be hit (0 counter). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 16 ++++++- drivers/net/dsa/bcm_sf2.h | 5 ++ drivers/net/dsa/bcm_sf2_cfp.c | 88 ++++++++++++++++++++++++++++++++++ drivers/net/dsa/bcm_sf2_regs.h | 4 ++ 4 files changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ce5f4260ea1a..5193da67dcdc 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -897,19 +897,33 @@ static const struct b53_io_ops bcm_sf2_io_ops = { static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { + int cnt = b53_get_sset_count(ds, port, stringset); + b53_get_strings(ds, port, stringset, data); + bcm_sf2_cfp_get_strings(ds, port, stringset, + data + cnt * ETH_GSTRING_LEN); } static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { + int cnt = b53_get_sset_count(ds, port, ETH_SS_STATS); + b53_get_ethtool_stats(ds, port, data); + bcm_sf2_cfp_get_ethtool_stats(ds, port, data + cnt); } static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds, int port, int sset) { - return b53_get_sset_count(ds, port, sset); + int cnt = b53_get_sset_count(ds, port, sset); + + if (cnt < 0) + return cnt; + + cnt += bcm_sf2_cfp_get_sset_count(ds, port, sset); + + return cnt; } static const struct dsa_switch_ops bcm_sf2_ops = { diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 83e1d8001447..eb3655bea467 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -213,5 +213,10 @@ int bcm_sf2_set_rxnfc(struct dsa_switch *ds, int port, int bcm_sf2_cfp_rst(struct bcm_sf2_priv *priv); void bcm_sf2_cfp_exit(struct dsa_switch *ds); int bcm_sf2_cfp_resume(struct dsa_switch *ds); +void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data); +void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data); +int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset); #endif /* __BCM_SF2_H */ diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 6d8059dc77b7..8747d18297fa 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -1201,3 +1201,91 @@ int bcm_sf2_cfp_resume(struct dsa_switch *ds) return ret; } + +static const struct bcm_sf2_cfp_stat { + unsigned int offset; + unsigned int ram_loc; + const char *name; +} bcm_sf2_cfp_stats[] = { + { + .offset = CORE_STAT_GREEN_CNTR, + .ram_loc = GREEN_STAT_RAM, + .name = "Green" + }, + { + .offset = CORE_STAT_YELLOW_CNTR, + .ram_loc = YELLOW_STAT_RAM, + .name = "Yellow" + }, + { + .offset = CORE_STAT_RED_CNTR, + .ram_loc = RED_STAT_RAM, + .name = "Red" + }, +}; + +void bcm_sf2_cfp_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats); + char buf[ETH_GSTRING_LEN]; + unsigned int i, j, iter; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 1; i < priv->num_cfp_rules; i++) { + for (j = 0; j < s; j++) { + snprintf(buf, sizeof(buf), + "CFP%03d_%sCntr", + i, bcm_sf2_cfp_stats[j].name); + iter = (i - 1) * s + j; + strlcpy(data + iter * ETH_GSTRING_LEN, + buf, ETH_GSTRING_LEN); + } + } +} + +void bcm_sf2_cfp_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + unsigned int s = ARRAY_SIZE(bcm_sf2_cfp_stats); + const struct bcm_sf2_cfp_stat *stat; + unsigned int i, j, iter; + struct cfp_rule *rule; + int ret; + + mutex_lock(&priv->cfp.lock); + for (i = 1; i < priv->num_cfp_rules; i++) { + rule = bcm_sf2_cfp_rule_find(priv, port, i); + if (!rule) + continue; + + for (j = 0; j < s; j++) { + stat = &bcm_sf2_cfp_stats[j]; + + bcm_sf2_cfp_rule_addr_set(priv, i); + ret = bcm_sf2_cfp_op(priv, stat->ram_loc | OP_SEL_READ); + if (ret) + continue; + + iter = (i - 1) * s + j; + data[iter] = core_readl(priv, stat->offset); + } + + } + mutex_unlock(&priv->cfp.lock); +} + +int bcm_sf2_cfp_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + + if (sset != ETH_SS_STATS) + return 0; + + /* 3 counters per CFP rules */ + return (priv->num_cfp_rules - 1) * ARRAY_SIZE(bcm_sf2_cfp_stats); +} diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 0a1e530d52b7..67f056206f37 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -400,6 +400,10 @@ enum bcm_sf2_reg_offs { #define CORE_RATE_METER6 0x281e0 #define CIR_REF_CNT_MASK 0x7ffff +#define CORE_STAT_GREEN_CNTR 0x28200 +#define CORE_STAT_YELLOW_CNTR 0x28210 +#define CORE_STAT_RED_CNTR 0x28220 + #define CORE_CFP_CTL_REG 0x28400 #define CFP_EN_MAP_MASK 0x1ff From db78ed2737b8607744623ba75bd477da8ad8a3cf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 12:46:00 -0800 Subject: [PATCH 0856/1436] net: dsa: bcm_sf2: Allow looping back CFP rules When the source and destination port of a CFP rule match, we must set the loopback bit enable to allow that, otherwise the frame is discarded. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 8747d18297fa..0b9ca4bdf47e 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -213,6 +213,7 @@ static inline unsigned int bcm_sf2_cfp_rule_size(struct bcm_sf2_priv *priv) static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, unsigned int rule_index, + int src_port, unsigned int port_num, unsigned int queue_num, bool fwd_map_change) @@ -230,6 +231,10 @@ static int bcm_sf2_cfp_act_pol_set(struct bcm_sf2_priv *priv, else reg = 0; + /* Enable looping back to the original port */ + if (src_port == port_num) + reg |= LOOP_BK_EN; + core_writel(priv, reg, CORE_ACT_POL_DATA0); /* Set classification ID that needs to be put in Broadcom tag */ @@ -443,7 +448,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, } /* Insert into Action and policer RAMs now */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index, port, port_num, queue_num, true); if (ret) goto out_err_flow_rule; @@ -733,7 +738,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, } /* Insert into Action and policer RAMs now */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[0], port, port_num, queue_num, false); if (ret) goto out_err_flow_rule; @@ -795,7 +800,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Insert into Action and policer RAMs now, set chain ID to * the one we are chained to */ - ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port_num, + ret = bcm_sf2_cfp_act_pol_set(priv, rule_index[1], port, port_num, queue_num, true); if (ret) goto out_err_flow_rule; From 225d9464268599a5b4d094d02ec17808e44c7553 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 7 Feb 2019 14:13:18 +0100 Subject: [PATCH 0857/1436] vsock: cope with memory allocation failure at socket creation time In the unlikely event that the kmalloc call in vmci_transport_socket_init() fails, we end-up calling vmci_transport_destruct() with a NULL vmci_trans() and oopsing. This change addresses the above explicitly checking for zero vmci_trans() at destruction time. Reported-by: Xiumei Mu Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Paolo Abeni Reviewed-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c361ce782412..c3d5ab01fba7 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ From ccc8ca9b90acb45a3309f922b2591b07b4e070ec Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2019 14:52:54 +0100 Subject: [PATCH 0858/1436] net/smc: fix byte_order for rx_curs_confirmed The recent change in the rx_curs_confirmed assignment disregards byte order, which causes problems on little endian architectures. This patch fixes it. Fixes: b8649efad879 ("net/smc: fix sender_free computation") (net-tree) Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 4 +--- net/smc/smc_cdc.h | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a712c9f8699b..fb07ad8d69a6 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -101,9 +101,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, - &conn->local_tx_ctrl, conn); - smc_curs_copy(&cfed, &((struct smc_host_cdc_msg *)wr_buf)->cons, conn); + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 271e2524dc8f..f1cdde9d4b89 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -211,26 +211,27 @@ static inline int smc_curs_diff_large(unsigned int size, static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, + union smc_host_cursor *save, struct smc_connection *conn) { - union smc_host_cursor temp; - - smc_curs_copy(&temp, local, conn); - peer->count = htonl(temp.count); - peer->wrap = htons(temp.wrap); + smc_curs_copy(save, local, conn); + peer->count = htonl(save->count); + peer->wrap = htons(save->wrap); /* peer->reserved = htons(0); must be ensured by caller */ } static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, - struct smc_host_cdc_msg *local, - struct smc_connection *conn) + struct smc_connection *conn, + union smc_host_cursor *save) { + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + peer->common.type = local->common.type; peer->len = local->len; peer->seqno = htons(local->seqno); peer->token = htonl(local->token); - smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn); - smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn); + smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); + smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); peer->prod_flags = local->prod_flags; peer->conn_state_flags = local->conn_state_flags; } From df9c716deb76642d0077770bca7107a31568c113 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 7 Feb 2019 06:20:11 -0800 Subject: [PATCH 0859/1436] qed: Add API for SmartAN query. The patch adds driver interface to read the SmartAN capability from management firmware. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 6 ++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 10 ++++++++++ include/linux/qed/qed_if.h | 1 + 5 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 417121e74ee9..37edaa847512 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12796,6 +12796,7 @@ struct public_drv_mb { #define FW_MB_PARAM_GET_PF_RDMA_BOTH 0x3 /* get MFW feature support response */ +#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ 0x00000001 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE 0x00000002 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK 0x00010000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b47352643fb5..f164d4acebcb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -281,6 +281,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) dev_info->wol_support = true; + dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn); + dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id; } else { qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bb8541847aa5..cc27fd60d689 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3654,6 +3654,12 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, } } +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn) +{ + return !!(p_hwfn->mcp_info->capabilities & + FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ); +} + int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 mcp_resp; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 6e1d72a669ae..2799e6741765 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -1148,6 +1148,16 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, struct qed_resc_unlock_params *p_unlock, enum qed_resc_lock resource, bool b_is_permanent); + +/** + * @brief - Return whether management firmware support smart AN + * + * @param p_hwfn + * + * @return bool - true if feature is supported. + */ +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn); + /** * @brief Learn of supported MFW features; To be done during early init * diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 35170f74ed80..f6165d304b4d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -643,6 +643,7 @@ struct qed_dev_info { u16 mtu; bool wol_support; + bool smart_an; /* MBI version */ u32 mbi_version; From f15cff0438d2b44f532e476fff45672a830eaaa7 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 7 Feb 2019 06:20:12 -0800 Subject: [PATCH 0860/1436] qede: Add ethtool interface for SmartAN query. The patch adds driver support to query SmartAN capability via ethtool. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 16331c6c6fa7..c6238083e898 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -186,11 +186,13 @@ static const struct { enum { QEDE_PRI_FLAG_CMT, + QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */ QEDE_PRI_FLAG_LEN, }; static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = { "Coupled-Function", + "SmartAN capable", }; enum qede_ethtool_tests { @@ -404,8 +406,15 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) static u32 qede_get_priv_flags(struct net_device *dev) { struct qede_dev *edev = netdev_priv(dev); + u32 flags = 0; - return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT; + if (edev->dev_info.common.num_hwfns > 1) + flags |= BIT(QEDE_PRI_FLAG_CMT); + + if (edev->dev_info.common.smart_an) + flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT); + + return flags; } struct qede_link_mode_mapping { From 4154b567b6d15a03cc9ac6c263c261da99b1ba32 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:58:56 -0600 Subject: [PATCH 0861/1436] bridge: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1fb885a33c66..4a048fd1cbea 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if (!nsrcs) return -EINVAL; - grec_len = sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + grec_len = struct_size(grec, grec_src, ntohs(*nsrcs)); if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; From 8fe5756c73822b2891b9316eae0195f1da2a1bb2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 19:02:52 -0600 Subject: [PATCH 0862/1436] net/sched: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); instance = alloc(size, GFP_KERNEL) This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 2b372a06b432..3663d3b615a4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_t t; int s; - s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); + s = struct_size(opt, keys, p->tcfp_nkeys); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); From b4ba9354cc303c6ae8b223b335cdecea87ac7201 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 19:10:52 -0600 Subject: [PATCH 0863/1436] mpls_iptunnel: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; instance = alloc(sizeof(struct foo) + count * sizeof(struct boo)); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = alloc(struct_size(instance, entry, count)); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 94f53a9b7d1a..dda8930f20e7 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla, &n_labels, NULL, extack)) return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + - n_labels * sizeof(u32)); + newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label, + n_labels)); if (!newts) return -ENOMEM; From 33b363e004fdb6abf12a5b0c51dc2c221d352cac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 19:16:03 -0600 Subject: [PATCH 0864/1436] net: dsa: use struct_size() in devm_kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = alloc(struct_size(instance, entry, count), GFP_KERNEL) Notice that, in this case, variable size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a1917025e155..8c431e0f3627 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds) struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) { - size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); struct dsa_switch *ds; int i; - ds = devm_kzalloc(dev, size, GFP_KERNEL); + ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL); if (!ds) return NULL; From fd6d122678054fb10582143de1922e5dab11b1d8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:13:13 -0600 Subject: [PATCH 0865/1436] net: usb: cdc-phonet: use struct_size() in alloc_netdev() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = alloc(sizeof(struct foo) + count * sizeof(void *)); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = alloc(struct_size(instance, entry, count)); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/usb/cdc-phonet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 78b16eb9e58c..63aaae487995 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -361,8 +361,8 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i else return -EINVAL; - dev = alloc_netdev(sizeof(*pnd) + sizeof(pnd->urbs[0]) * rxq_size, - ifname, NET_NAME_UNKNOWN, usbpn_setup); + dev = alloc_netdev(struct_size(pnd, urbs, rxq_size), ifname, + NET_NAME_UNKNOWN, usbpn_setup); if (!dev) return -ENOMEM; From a3deec5b3f13b9807bbf2522b4779e4c07cbad66 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:16:48 -0600 Subject: [PATCH 0866/1436] wan: wanxl: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = alloc(struct_size(instance, entry, count), GFP_KERNEL) Notice that, in this case, variable alloc_size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/wan/wanxl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index d573a57bc301..459ce52a0a4d 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -565,7 +565,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, u32 plx_phy; /* PLX PCI base address */ u32 mem_phy; /* memory PCI base addr */ u8 __iomem *mem; /* memory virtual base addr */ - int i, ports, alloc_size; + int i, ports; #ifndef MODULE pr_info_once("%s\n", version); @@ -601,8 +601,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, default: ports = 4; } - alloc_size = sizeof(struct card) + ports * sizeof(struct port); - card = kzalloc(alloc_size, GFP_KERNEL); + card = kzalloc(struct_size(card, ports, ports), GFP_KERNEL); if (card == NULL) { pci_release_regions(pdev); pci_disable_device(pdev); From 13644be211bfd9e4685425c5adf7430a04768a06 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:22:17 -0600 Subject: [PATCH 0867/1436] wimax/i2400m: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; size = sizeof(struct foo) + count * sizeof(void *); instance = alloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 0b602951ff6b..d28b96d06919 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1260,8 +1260,8 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb) goto error_msg_hdr_check; result = -EIO; num_pls = le16_to_cpu(msg_hdr->num_pls); - pl_itr = sizeof(*msg_hdr) + /* Check payload descriptor(s) */ - num_pls * sizeof(msg_hdr->pld[0]); + /* Check payload descriptor(s) */ + pl_itr = struct_size(msg_hdr, pld, num_pls); pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN); if (pl_itr > skb_len) { /* got all the payload descriptors? */ dev_err(dev, "RX: HW BUG? message too short (%u bytes) for " From 370600afdd2e33665c84d06f34e7c223d5379b4a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:29:10 -0600 Subject: [PATCH 0868/1436] bnx2x: Use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL) Notice that, in this case, variable fsz is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 8e0a317b31f7..a9bdc21873d3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -1654,13 +1654,9 @@ static int bnx2x_vf_mbx_macvlan_list(struct bnx2x *bp, { int i, j; struct bnx2x_vf_mac_vlan_filters *fl = NULL; - size_t fsz; - fsz = tlv->n_mac_vlan_filters * - sizeof(struct bnx2x_vf_mac_vlan_filter) + - sizeof(struct bnx2x_vf_mac_vlan_filters); - - fl = kzalloc(fsz, GFP_KERNEL); + fl = kzalloc(struct_size(fl, filters, tlv->n_mac_vlan_filters), + GFP_KERNEL); if (!fl) return -ENOMEM; From 9e475293cd40fc9c14de63c53b5754f06007059d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:42:41 -0600 Subject: [PATCH 0869/1436] mlxsw: spectrum_router: Use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL) Notice that, in this case, variable alloc_size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6754061d9b72..818040ce4d68 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3814,13 +3814,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; - size_t alloc_size; int i; int err; - alloc_size = sizeof(*nh_grp) + - fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -5066,13 +5064,11 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - size_t alloc_size; int i = 0; int err; - alloc_size = sizeof(*nh_grp) + - fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); From af6f12f22b141d755876ab95635619166b1c574e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:47:25 -0600 Subject: [PATCH 0870/1436] nfp: flower: cmsg: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; size = sizeof(struct foo) + count * sizeof(void *); instance = alloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = alloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, in this case, variable size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 56b22ea32474..cf9e1118ee8f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -45,11 +45,9 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) { struct nfp_flower_cmsg_mac_repr *msg; struct sk_buff *skb; - unsigned int size; - size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]); - skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR, - GFP_KERNEL); + skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports), + NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL); if (!skb) return NULL; From 9a00536c38e9df8b6d942316888990d849fe1d01 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 21:55:37 -0600 Subject: [PATCH 0871/1436] fm10k: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, in this case, variable size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 6fd15a734324..5a0419421511 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1603,14 +1603,12 @@ static int fm10k_alloc_q_vector(struct fm10k_intfc *interface, { struct fm10k_q_vector *q_vector; struct fm10k_ring *ring; - int ring_count, size; + int ring_count; ring_count = txr_count + rxr_count; - size = sizeof(struct fm10k_q_vector) + - (sizeof(struct fm10k_ring) * ring_count); /* allocate q_vector and rings */ - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL); if (!q_vector) return -ENOMEM; From c397ab21ba362d5c5215e53f36685a9fffc45f66 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 7 Feb 2019 20:22:20 +0100 Subject: [PATCH 0872/1436] net: phy: don't double-read link status register if link is up The link status register latches link-down events. Therefore, if link is reported as being up, there's no need for a second read. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 2 ++ drivers/net/phy/phy_device.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 706a92a8c1b2..eff9e5a4d831 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -148,6 +148,8 @@ int genphy_c45_read_link(struct phy_device *phydev) val = phy_read_mmd(phydev, devad, MDIO_STAT1); if (val < 0) return val; + else if (val & MDIO_STAT1_LSTATUS) + continue; } val = phy_read_mmd(phydev, devad, MDIO_STAT1); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d490cd2a8962..9369e1323c39 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1735,8 +1735,12 @@ int genphy_update_link(struct phy_device *phydev) */ if (!phy_polling_mode(phydev)) { status = phy_read(phydev, MII_BMSR); - if (status < 0) + if (status < 0) { return status; + } else if (status & BMSR_LSTATUS) { + phydev->link = 1; + return 0; + } } /* Read link and autonegotiation status */ From a0feac18b8b590d9b588b4285841bac7c372fd8b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 22:15:40 -0600 Subject: [PATCH 0873/1436] igb: use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6d812e96572d..69b230c53fed 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1189,15 +1189,15 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, { struct igb_q_vector *q_vector; struct igb_ring *ring; - int ring_count, size; + int ring_count; + size_t size; /* igb only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igb_q_vector) + - (sizeof(struct igb_ring) * ring_count); + size = struct_size(q_vector, ring, ring_count); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; From 196d7311fab5941b1bc883471c7f46350fa1a797 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 22:19:45 -0600 Subject: [PATCH 0874/1436] igc: Use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL) Notice that, in this case, variable size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 11c75433fe22..87a11879bf2d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2958,22 +2958,21 @@ static int igc_alloc_q_vector(struct igc_adapter *adapter, { struct igc_q_vector *q_vector; struct igc_ring *ring; - int ring_count, size; + int ring_count; /* igc only supports 1 Tx and/or 1 Rx queue per vector */ if (txr_count > 1 || rxr_count > 1) return -ENOMEM; ring_count = txr_count + rxr_count; - size = sizeof(struct igc_q_vector) + - (sizeof(struct igc_ring) * ring_count); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; if (!q_vector) - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), + GFP_KERNEL); else - memset(q_vector, 0, size); + memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); if (!q_vector) return -ENOMEM; From 439bb9edd430f606cce80b791307190ba23f7266 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 22:22:58 -0600 Subject: [PATCH 0875/1436] ixgbe: Use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, in this case, variable size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 62e6499e4146..cc3196ae5aea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -836,12 +836,10 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring; int node = NUMA_NO_NODE; int cpu = -1; - int ring_count, size; + int ring_count; u8 tcs = adapter->hw_tcs; ring_count = txr_count + rxr_count + xdp_count; - size = sizeof(struct ixgbe_q_vector) + - (sizeof(struct ixgbe_ring) * ring_count); /* customize cpu for Flow Director mapping */ if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { @@ -855,9 +853,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, } /* allocate q_vector and rings */ - q_vector = kzalloc_node(size, GFP_KERNEL, node); + q_vector = kzalloc_node(struct_size(q_vector, ring, ring_count), + GFP_KERNEL, node); if (!q_vector) - q_vector = kzalloc(size, GFP_KERNEL); + q_vector = kzalloc(struct_size(q_vector, ring, ring_count), + GFP_KERNEL); if (!q_vector) return -ENOMEM; From 8b34ec65b3d070c492cc6aca960097ae74d6143a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Feb 2019 21:46:53 -0700 Subject: [PATCH 0876/1436] ethtool: Remove unnecessary null check in ethtool_rx_flow_rule_create net/core/ethtool.c:3023:19: warning: address of array 'ext_m_spec->h_dest' will always evaluate to 'true' [-Wpointer-bool-conversion] if (ext_m_spec->h_dest) { ~~ ~~~~~~~~~~~~^~~~~~ h_dest is an array, it can't be null so remove this check. Fixes: eca4205f9ec3 ("ethtool: add ethtool_rx_flow_spec to flow_rule structure translator") Link: https://github.com/ClangBuiltLinux/linux/issues/353 Signed-off-by: Nathan Chancellor Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/ethtool.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0fbf39239b29..d2c47cdf25da 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3020,17 +3020,15 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - if (ext_m_spec->h_dest) { - memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, - ETH_ALEN); - memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, - ETH_ALEN); + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); - match->dissector.used_keys |= - BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); - match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = - offsetof(struct ethtool_rx_flow_key, eth_addrs); - } + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); } act = &flow->rule->action.entries[0]; From 388ca27ffd21c6b31d6378d4b24b7c49066848a0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Feb 2019 15:35:38 +0000 Subject: [PATCH 0877/1436] net: marvell: mvpp2: phylink compliance updates Sven Auhagen reported issues with negotiation on a couple of his platforms using a mixture of SFP and PHYs in various different modes. Debugging to root cause proved difficult, but essentially the problem comes down to the mvpp2 phylink implementation being slightly at odds with what is expected. phylink operates in three modes: phy, fixed-link, and in-band mode. In the first two modes, the expected behaviour from a MAC driver is that phylink resolves the operating mode and passes the mode to the MAC driver for it to program, including when the link should be brought up or taken down. This is basically the same as the libphy approach. This does not negate the requirement to advertise a correct control word for interface modes that have control words where that can be reasonably controlled. The second mode is in-band mode, where the MAC is expected to use the in-band control word to determine the operating mode. The mvneta driver implements the correct pattern required to support this: configure the port interface type separately from the in-band mode(s). This is now specified in the phylink documentation patches. mvpp2 was programming in-band mode for SGMII and the 802.3z modes no what, and avoided forcing the link up in fixed/phy modes. This caused a problem with some boards where the PHY is by default programmed to enter AN bypass mode, the PHY would report that the link was up, but the mvpp2 never completed the exchange of control word. Another issue that mvpp2 has is it sets SGMII AN format control word for both SGMII and 802.3z modes. The format of the control word is defined by MVPP2_GMAC_INBAND_AN_MASK, which should be set for SGMII and clear for 802.3z. Available Marvell documentation for earlier GMAC implementations does not make this clear, but this has been ascertained via extensive testing on earlier GMAC implementations, and then confirmed with a Macchiatobin Single Shot connected to a Clearfog: when MVPP2_GMAC_INBAND_AN_MASK is set, the clearfog does not receive the advertised pause mode settings. Lastly, there is no flow control in the in-band control word in Cisco SGMII, setting the flow control autonegotiation bit even with a PHY that has the Marvell extension to send this information does not result in the flow control being enabled at the MAC. We need to do this manually using the information provided via phylink. Re-code mvpp2's mac_config() and mac_link_up() to follow this pattern. This allows Sven Auhagen's board and Macchiatobin to reliably bring the link up with the 88e1512 PHY with phylink operating in PHY mode with COMPHY built as a module but the rest of the networking built-in, and u-boot having brought up the interface. in-band mode requires an additional patch to resolve another problem. Tested-by: Sven Auhagen Signed-off-by: Russell King Signed-off-by: David S. Miller --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 109 +++++++++++------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 5c0c26fd8363..f16ba7817f62 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4557,58 +4557,84 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG | - MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN | - MVPP2_GMAC_FORCE_LINK_DOWN); + MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN); ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK; - ctrl2 &= ~(MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK); + ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK | + MVPP2_GMAC_PCS_ENABLE_MASK); + ctrl4 &= ~(MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN); + /* Configure port type */ if (phy_interface_mode_is_8023z(state->interface)) { + ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK; + ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; + ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_DP_CLK_SEL | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + } else if (state->interface == PHY_INTERFACE_MODE_SGMII) { + ctrl2 |= MVPP2_GMAC_PCS_ENABLE_MASK | MVPP2_GMAC_INBAND_AN_MASK; + ctrl4 &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL; + ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_DP_CLK_SEL | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + } else if (phy_interface_mode_is_rgmii(state->interface)) { + ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL; + ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | + MVPP22_CTRL4_SYNC_BYPASS_DIS | + MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + } + + /* Configure advertisement bits */ + if (phylink_test(state->advertising, Pause)) + an |= MVPP2_GMAC_FC_ADV_EN; + if (phylink_test(state->advertising, Asym_Pause)) + an |= MVPP2_GMAC_FC_ADV_ASM_EN; + + /* Configure negotiation style */ + if (!phylink_autoneg_inband(mode)) { + /* Phy or fixed speed - no in-band AN */ + if (state->duplex) + an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; + + if (state->speed == SPEED_1000 || state->speed == SPEED_2500) + an |= MVPP2_GMAC_CONFIG_GMII_SPEED; + else if (state->speed == SPEED_100) + an |= MVPP2_GMAC_CONFIG_MII_SPEED; + + if (state->pause & MLO_PAUSE_TX) + ctrl4 |= MVPP22_CTRL4_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + ctrl4 |= MVPP22_CTRL4_RX_FC_EN; + } else if (state->interface == PHY_INTERFACE_MODE_SGMII) { + /* SGMII in-band mode receives the speed and duplex from + * the PHY. Flow control information is not received. */ + an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS); + an |= MVPP2_GMAC_IN_BAND_AUTONEG | + MVPP2_GMAC_AN_SPEED_EN | + MVPP2_GMAC_AN_DUPLEX_EN; + + if (state->pause & MLO_PAUSE_TX) + ctrl4 |= MVPP22_CTRL4_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + ctrl4 |= MVPP22_CTRL4_RX_FC_EN; + } else if (phy_interface_mode_is_8023z(state->interface)) { /* 1000BaseX and 2500BaseX ports cannot negotiate speed nor can * they negotiate duplex: they are always operating with a fixed * speed of 1000/2500Mbps in full duplex, so force 1000/2500 * speed and full duplex here. */ ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK; + an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS); an |= MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_CONFIG_FULL_DUPLEX; - } else if (!phy_interface_mode_is_rgmii(state->interface)) { - an |= MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG; - } - if (state->duplex) - an |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; - if (phylink_test(state->advertising, Pause)) - an |= MVPP2_GMAC_FC_ADV_EN; - if (phylink_test(state->advertising, Asym_Pause)) - an |= MVPP2_GMAC_FC_ADV_ASM_EN; - - if (phy_interface_mode_is_8023z(state->interface) || - state->interface == PHY_INTERFACE_MODE_SGMII) { - an |= MVPP2_GMAC_IN_BAND_AUTONEG; - ctrl2 |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK; - - ctrl4 &= ~(MVPP22_CTRL4_EXT_PIN_GMII_SEL | - MVPP22_CTRL4_RX_FC_EN | MVPP22_CTRL4_TX_FC_EN); - ctrl4 |= MVPP22_CTRL4_SYNC_BYPASS_DIS | - MVPP22_CTRL4_DP_CLK_SEL | - MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; - - if (state->pause & MLO_PAUSE_TX) - ctrl4 |= MVPP22_CTRL4_TX_FC_EN; - if (state->pause & MLO_PAUSE_RX) - ctrl4 |= MVPP22_CTRL4_RX_FC_EN; - } else if (phy_interface_mode_is_rgmii(state->interface)) { - an |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS; - - if (state->speed == SPEED_1000) - an |= MVPP2_GMAC_CONFIG_GMII_SPEED; - else if (state->speed == SPEED_100) - an |= MVPP2_GMAC_CONFIG_MII_SPEED; - - ctrl4 &= ~MVPP22_CTRL4_DP_CLK_SEL; - ctrl4 |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | - MVPP22_CTRL4_SYNC_BYPASS_DIS | - MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE; + if (state->pause & MLO_PAUSE_AN && state->an_enabled) { + an |= MVPP2_GMAC_FLOW_CTRL_AUTONEG; + } else { + if (state->pause & MLO_PAUSE_TX) + ctrl4 |= MVPP22_CTRL4_TX_FC_EN; + if (state->pause & MLO_PAUSE_RX) + ctrl4 |= MVPP22_CTRL4_RX_FC_EN; + } } writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG); @@ -4670,8 +4696,7 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, interface != PHY_INTERFACE_MODE_10GKR) { val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); val &= ~MVPP2_GMAC_FORCE_LINK_DOWN; - if (phy_interface_mode_is_rgmii(interface)) - val |= MVPP2_GMAC_FORCE_LINK_PASS; + val |= MVPP2_GMAC_FORCE_LINK_PASS; writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); } From 316734fdcf70900a83065360cff11a5826919067 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Feb 2019 15:35:43 +0000 Subject: [PATCH 0878/1436] net: marvell: mvpp2: fix stuck in-band SGMII negotiation It appears that the mvpp22 can get stuck with SGMII negotiation. The symptoms are that in-band negotiation never completes and the partner (eg, PHY) never reports SGMII link up, or if it supports negotiation bypass, goes into negotiation bypass mode (which will happen when the PHY sees that the MAC is alive but gets no response.) Triggering the PHY end of the link to re-negotiate results in the bypass bit clearing on the PHY, and then re-setting - indicating that the problem is at the mvpp22 GMAC end. Asserting the GMAC reset and de-asserting it resolves the issue. Arrange to assert the GMAC reset at probe time, and deassert it only after we have configured the GMAC for the appropriate mode. This resolves the issue. Tested-by: Sven Auhagen Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f16ba7817f62..7a60456d91be 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1377,13 +1377,9 @@ static void mvpp2_port_reset(struct mvpp2_port *port) for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & - ~MVPP2_GMAC_PORT_RESET_MASK; + val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) | + MVPP2_GMAC_PORT_RESET_MASK; writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); - - while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) & - MVPP2_GMAC_PORT_RESET_MASK) - continue; } /* Change maximum receive size of the port */ @@ -4539,12 +4535,15 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, const struct phylink_link_state *state) { u32 an, ctrl0, ctrl2, ctrl4; + u32 old_ctrl2; an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG); ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG); ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); + old_ctrl2 = ctrl2; + /* Force link down */ an &= ~MVPP2_GMAC_FORCE_LINK_PASS; an |= MVPP2_GMAC_FORCE_LINK_DOWN; @@ -4641,6 +4640,12 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG); writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + + if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) { + while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) & + MVPP2_GMAC_PORT_RESET_MASK) + continue; + } } static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, From d14e078f23cc9711acd299c91e77076c3f1ddddb Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Feb 2019 15:35:49 +0000 Subject: [PATCH 0879/1436] net: marvell: mvpp2: only reprogram what is necessary on mac_config mac_config() can be called at any point, and the expected behaviour from MAC drivers is to only reprogram when necessary - and certainly avoid taking the link down on every call. Unfortunately, mvpp2 does exactly that - it takes the link down, and reprograms everything, and then releases the forced-link down. This is bad, it can cause the link to bounce: - SFP detects signal, disables LOS indication. - SFP code calls into phylink, calling phylink_sfp_link_up() which triggers a resolve. - phylink_resolve() calls phylink_get_mac_state() and finds the MAC reporting link up. - phylink wants to configure the pause mode on the MAC, so calls phylink_mac_config() - mvpp2 takes the link down temporarily, generating a MAC link down event followed by another MAC link event. - phylink calls mac_link_up() and then processes the MAC link down event. - phylink_resolve() gets called again, registers the link down, and calls mach_link_down() before re-running itself. - phylink_resolve() starts again at step 3 above. This sequence repeats. GMAC versions prior to mvpp2 do not require the link to be taken down except when certain link properties (eg, switching between SGMII and 1000base-X mode, or enabling/disabling in-band negotiation) are changed. Implement this for mvpp2. Tested-by: Sven Auhagen Signed-off-by: Russell King Signed-off-by: David S. Miller --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 7a60456d91be..133a6d4f94a1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4534,29 +4534,21 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, const struct phylink_link_state *state) { - u32 an, ctrl0, ctrl2, ctrl4; - u32 old_ctrl2; + u32 old_an, an; + u32 old_ctrl0, ctrl0; + u32 old_ctrl2, ctrl2; + u32 old_ctrl4, ctrl4; - an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG); - ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG); - ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); - - old_ctrl2 = ctrl2; - - /* Force link down */ - an &= ~MVPP2_GMAC_FORCE_LINK_PASS; - an |= MVPP2_GMAC_FORCE_LINK_DOWN; - writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); - - /* Set the GMAC in a reset state */ - ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK; - writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + old_an = an = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + old_ctrl0 = ctrl0 = readl(port->base + MVPP2_GMAC_CTRL_0_REG); + old_ctrl2 = ctrl2 = readl(port->base + MVPP2_GMAC_CTRL_2_REG); + old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); an &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FC_ADV_EN | MVPP2_GMAC_FC_ADV_ASM_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG | - MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN); + MVPP2_GMAC_CONFIG_FULL_DUPLEX | MVPP2_GMAC_AN_DUPLEX_EN | + MVPP2_GMAC_IN_BAND_AUTONEG | MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS); ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK; ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PORT_RESET_MASK | MVPP2_GMAC_PCS_ENABLE_MASK); @@ -4623,7 +4615,8 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, */ ctrl0 |= MVPP2_GMAC_PORT_TYPE_MASK; an &= ~(MVPP2_GMAC_FORCE_LINK_DOWN | MVPP2_GMAC_FORCE_LINK_PASS); - an |= MVPP2_GMAC_CONFIG_GMII_SPEED | + an |= MVPP2_GMAC_IN_BAND_AUTONEG | + MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_CONFIG_FULL_DUPLEX; if (state->pause & MLO_PAUSE_AN && state->an_enabled) { @@ -4636,10 +4629,29 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, } } - writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG); - writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); - writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG); - writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + if ((old_ctrl0 ^ ctrl0) & MVPP2_GMAC_PORT_TYPE_MASK || + (old_ctrl2 ^ ctrl2) & MVPP2_GMAC_INBAND_AN_MASK || + (old_an ^ an) & MVPP2_GMAC_IN_BAND_AUTONEG) { + /* Force link down */ + old_an &= ~MVPP2_GMAC_FORCE_LINK_PASS; + old_an |= MVPP2_GMAC_FORCE_LINK_DOWN; + writel(old_an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + + /* Set the GMAC in a reset state - do this in a way that + * ensures we clear it below. + */ + old_ctrl2 |= MVPP2_GMAC_PORT_RESET_MASK; + writel(old_ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + } + + if (old_ctrl0 != ctrl0) + writel(ctrl0, port->base + MVPP2_GMAC_CTRL_0_REG); + if (old_ctrl2 != ctrl2) + writel(ctrl2, port->base + MVPP2_GMAC_CTRL_2_REG); + if (old_ctrl4 != ctrl4) + writel(ctrl4, port->base + MVPP22_GMAC_CTRL_4_REG); + if (old_an != an) + writel(an, port->base + MVPP2_GMAC_AUTONEG_CONFIG); if (old_ctrl2 & MVPP2_GMAC_PORT_RESET_MASK) { while (readl(port->base + MVPP2_GMAC_CTRL_2_REG) & From 417f3d08feffe3be29a5f868bcf817b476539338 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Feb 2019 15:35:54 +0000 Subject: [PATCH 0880/1436] net: marvell: mvpp2: read correct pause bits When reading the pause bits in mac_link_state, mvpp2 was reporting the state of the "active pause" bits, which are set when the MAC is in pause mode. This is not what phylink wants - we want the negotiated pause state. Fix the definition so we read the correct bits. Tested-by: Sven Auhagen Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 398328f10743..96e3f0669032 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -402,8 +402,8 @@ #define MVPP2_GMAC_STATUS0_GMII_SPEED BIT(1) #define MVPP2_GMAC_STATUS0_MII_SPEED BIT(2) #define MVPP2_GMAC_STATUS0_FULL_DUPLEX BIT(3) -#define MVPP2_GMAC_STATUS0_RX_PAUSE BIT(6) -#define MVPP2_GMAC_STATUS0_TX_PAUSE BIT(7) +#define MVPP2_GMAC_STATUS0_RX_PAUSE BIT(4) +#define MVPP2_GMAC_STATUS0_TX_PAUSE BIT(5) #define MVPP2_GMAC_STATUS0_AN_COMPLETE BIT(11) #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG 0x1c #define MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS 6 From a4650477179017f47706c3cc7641927326bb663c Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Feb 2019 15:35:59 +0000 Subject: [PATCH 0881/1436] net: marvell: mvpp2: fix AN restart phylink already limits which interface modes are able to call the MACs AN restart function, but in any case, the commentry seems incorrect: the AN restart bit does not automatically clear when set. This has been found via manual setting using devmem2, and we can observe that the AN does indeed restart and complete, yet the AN restart bit remains set. Explicitly clear the AN restart bit. Tested-by: Sven Auhagen Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 133a6d4f94a1..8a2dd9104e37 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4497,17 +4497,12 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev, static void mvpp2_mac_an_restart(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); - u32 val; + u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - if (port->phy_interface != PHY_INTERFACE_MODE_SGMII) - return; - - val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - /* The RESTART_AN bit is cleared by the h/w after restarting the AN - * process. - */ - val |= MVPP2_GMAC_IN_BAND_RESTART_AN | MVPP2_GMAC_IN_BAND_AUTONEG; - writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN, + port->base + MVPP2_GMAC_AUTONEG_CONFIG); + writel(val & ~MVPP2_GMAC_IN_BAND_RESTART_AN, + port->base + MVPP2_GMAC_AUTONEG_CONFIG); } static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, From 3b5e74e0afe3382f9354b657714ac40673b7c597 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 Feb 2019 19:25:22 +0100 Subject: [PATCH 0882/1436] net: phy: disregard "Clause 22 registers present" bit in get_phy_c45_devs_in_pkg Bit 0 in register 1.5 doesn't represent a device but is a flag that Clause 22 registers are present. Therefore disregard this bit when populating the device list. If code needs this information it should read register 1.5 directly instead of accessing the device list. Because this bit doesn't represent a device don't define a MDIO_MMD_XYZ constant, just define a MDIO_DEVS_XYZ constant for the flag in the device list bitmap. v2: - make masking of bit 0 more explicit - improve commit message Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 +++ include/uapi/linux/mdio.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9369e1323c39..d4fc1fd8af41 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -684,6 +684,9 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr, return -EIO; *devices_in_package |= (phy_reg & 0xffff); + /* Bit 0 doesn't represent a device, it indicates c22 regs presence */ + *devices_in_package &= ~BIT(0); + return 0; } diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 2e6e309f0847..0e012b168e4d 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -115,6 +115,7 @@ /* Device present registers. */ #define MDIO_DEVS_PRESENT(devad) (1 << (devad)) +#define MDIO_DEVS_C22PRESENT MDIO_DEVS_PRESENT(0) #define MDIO_DEVS_PMAPMD MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD) #define MDIO_DEVS_WIS MDIO_DEVS_PRESENT(MDIO_MMD_WIS) #define MDIO_DEVS_PCS MDIO_DEVS_PRESENT(MDIO_MMD_PCS) From 180cf62cec0418a64dade18b3575047af46c6335 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 9 Feb 2019 00:05:36 +0100 Subject: [PATCH 0883/1436] batman-adv: Fix typo "reseved" -> "reserved" checkpatch.pl complains since commit 45e417022023 ("scripts/spelling.txt: add more spellings to spelling.txt") about an additional spelling mistake in batman-adv:` CHECK: 'reseved' may be misspelled - perhaps 'reserved'? #232: FILE: include/uapi/linux/batadv_packet.h:232: + * @flags: reseved for routing relevant flags - currently always 0 Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 7eb2936a8e22..c99336f4eefe 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -229,7 +229,7 @@ struct batadv_ogm_packet { * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the general header * @ttl: time to live for this packet, part of the general header - * @flags: reseved for routing relevant flags - currently always 0 + * @flags: reserved for routing relevant flags - currently always 0 * @seqno: sequence number * @orig: originator mac address * @tvlv_len: length of the appended tvlv buffer (in bytes) From c4a7a8d9bb8f9e712d0e4c90c3d11b4388b621ba Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:00:28 +0100 Subject: [PATCH 0884/1436] batman-adv: Move common genl doit code pre/post hooks The commit ff4c92d85c6f ("genetlink: introduce pre_doit/post_doit hooks") intoduced a mechanism to run specific code for doit hooks before/after the hooks are run. Since all doit hooks are requiring the batadv softif, it should be retrieved/freed in these helpers to simplify the code. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/netlink.c | 143 +++++++++++++++++++++++++++------------ 1 file changed, 100 insertions(+), 43 deletions(-) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 5fe833cc9507..e56c40d1bfe0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -20,8 +20,10 @@ #include "main.h" #include +#include #include #include +#include #include #include #include @@ -54,6 +56,8 @@ #include "tp_meter.h" #include "translation-table.h" +struct net; + struct genl_family batadv_netlink_family; /* multicast groups */ @@ -61,6 +65,18 @@ enum batadv_netlink_multicast_groups { BATADV_NL_MCGRP_TPMETER, }; +/** + * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags + */ +enum batadv_genl_ops_flags { + /** + * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[0] + */ + BATADV_FLAG_NEED_MESH = BIT(0), +}; + static const struct genl_multicast_group batadv_netlink_mcgrps[] = { [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; @@ -329,40 +345,24 @@ err_genlmsg: static int batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; + struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg = NULL; u32 test_length; void *msg_head; - int ifindex; u32 cookie; u8 *dst; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -377,15 +377,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) goto out; } - bat_priv = netdev_priv(soft_iface); batadv_tp_start(bat_priv, dst, test_length, &cookie); ret = batadv_netlink_tp_meter_put(msg, cookie); out: - if (soft_iface) - dev_put(soft_iface); - if (ret) { if (msg) nlmsg_free(msg); @@ -406,38 +402,17 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) static int batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; - int ifindex; + struct batadv_priv *bat_priv = info->user_ptr[0]; u8 *dst; int ret = 0; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - - bat_priv = netdev_priv(soft_iface); batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); -out: - if (soft_iface) - dev_put(soft_iface); - return ret; } @@ -545,6 +520,84 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +/** + * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to soft interface (with increased refcnt) on success, error + * pointer on error + */ +static struct net_device * +batadv_get_softif_from_info(struct net *net, struct genl_info *info) +{ + struct net_device *soft_iface; + int ifindex; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return ERR_PTR(-EINVAL); + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return ERR_PTR(-ENODEV); + + if (!batadv_softif_is_valid(soft_iface)) + goto err_put_softif; + + return soft_iface; + +err_put_softif: + dev_put(soft_iface); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_pre_doit() - Prepare batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { + soft_iface = batadv_get_softif_from_info(net, info); + if (IS_ERR(soft_iface)) + return PTR_ERR(soft_iface); + + bat_priv = netdev_priv(soft_iface); + info->user_ptr[0] = bat_priv; + } + + return 0; +} + +/** + * batadv_post_doit() - End batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + */ +static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_priv *bat_priv; + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { + bat_priv = info->user_ptr[0]; + dev_put(bat_priv->soft_iface); + } +} + static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, @@ -557,12 +610,14 @@ static const struct genl_ops batadv_netlink_ops[] = { .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, @@ -639,6 +694,8 @@ struct genl_family batadv_netlink_family __ro_after_init = { .version = 1, .maxattr = BATADV_ATTR_MAX, .netnsok = true, + .pre_doit = batadv_pre_doit, + .post_doit = batadv_post_doit, .module = THIS_MODULE, .ops = batadv_netlink_ops, .n_ops = ARRAY_SIZE(batadv_netlink_ops), From 60040513536097584c3d55b39acdfa7080645d80 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:14:56 +0100 Subject: [PATCH 0885/1436] batman-adv: Prepare framework for mesh genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. The main objects for this configuration is the mesh/soft-interface object. Its actual object in memory already contains most of the available configuration settings. The genl interface reflects this by allowing to get/set it using the mesh specific commands. The BATADV_CMD_GET_MESH_INFO (or short version BATADV_CMD_GET_MESH) is reused as get command because it already provides the content of other information from the mesh/soft-interface which are not yet configuration specific. The set command BATADV_CMD_SET_MESH will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_MESH command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 16 ++- net/batman-adv/netlink.c | 180 +++++++++++++++++++------------- 2 files changed, 121 insertions(+), 75 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a28e76a7e0a2..b3394aac9a88 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -27,6 +27,7 @@ #define BATADV_NL_NAME "batadv" +#define BATADV_NL_MCAST_GROUP_CONFIG "config" #define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" /** @@ -372,10 +373,14 @@ enum batadv_nl_commands { BATADV_CMD_UNSPEC, /** - * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv - * device + * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh */ - BATADV_CMD_GET_MESH_INFO, + BATADV_CMD_GET_MESH, + + /** + * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH + */ + BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH, /** * @BATADV_CMD_TP_METER: Start a tp meter session @@ -443,6 +448,11 @@ enum batadv_nl_commands { */ BATADV_CMD_GET_MCAST_FLAGS, + /** + * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh + */ + BATADV_CMD_SET_MESH, + /* add new commands above here */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index e56c40d1bfe0..179cc7cfcb70 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -62,6 +62,7 @@ struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_CONFIG, BATADV_NL_MCGRP_TPMETER, }; @@ -78,6 +79,7 @@ enum batadv_genl_ops_flags { }; static const struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG }, [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; @@ -138,20 +140,29 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_info_put() - fill in generic information about mesh - * interface - * @msg: netlink message to be sent back - * @soft_iface: interface for which the data should be taken + * batadv_netlink_mesh_fill() - Fill message with mesh attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message * - * Return: 0 on success, < 0 on error + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +static int batadv_netlink_mesh_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct net_device *soft_iface = bat_priv->soft_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; - int ret = -ENOBUFS; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, @@ -162,16 +173,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) soft_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) - goto out; + goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, ntohs(bat_priv->bla.claim_dest.group))) - goto out; + goto nla_put_failure; #endif if (batadv_mcast_mesh_info_put(msg, bat_priv)) - goto out; + goto nla_put_failure; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { @@ -183,77 +194,95 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) hard_iface->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, hard_iface->dev_addr)) - goto out; + goto nla_put_failure; } - ret = 0; - - out: if (primary_if) batadv_hardif_put(primary_if); + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + if (primary_if) + batadv_hardif_put(primary_if); + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_notify_mesh() - send softif attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success, < 0 on error + */ +static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH, + 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_mesh() - Get softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + return ret; } /** - * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO - * netlink request - * @skb: received netlink message + * batadv_netlink_set_mesh() - Set softif attributes + * @skb: Netlink message with request data * @info: receiver information * - * Return: 0 on success, < 0 on error + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct sk_buff *msg = NULL; - void *msg_head; - int ifindex; - int ret; + struct batadv_priv *bat_priv = info->user_ptr[0]; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; + batadv_netlink_notify_mesh(bat_priv); - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; - } - - msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &batadv_netlink_family, 0, - BATADV_CMD_GET_MESH_INFO); - if (!msg_head) { - ret = -ENOBUFS; - goto out; - } - - ret = batadv_netlink_mesh_info_put(msg, soft_iface); - - out: - if (soft_iface) - dev_put(soft_iface); - - if (ret) { - if (msg) - nlmsg_free(msg); - return ret; - } - - genlmsg_end(msg, msg_head); - return genlmsg_reply(msg, info); + return 0; } /** @@ -600,10 +629,11 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops batadv_netlink_ops[] = { { - .cmd = BATADV_CMD_GET_MESH_INFO, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_MESH, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .doit = batadv_netlink_get_mesh_info, + .doit = batadv_netlink_get_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, @@ -685,7 +715,13 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, - + { + .cmd = BATADV_CMD_SET_MESH, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { From 5c55a40fa801df2d807141319c0fdbb3939c3947 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:33:17 +0100 Subject: [PATCH 0886/1436] batman-adv: Prepare framework for hardif genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. Beside the mesh/soft-interface specific configuration, the slave/hard-interface have B.A.T.M.A.N. V specific configuration settings. The genl interface reflects this by allowing to get/set it using the hard-interface specific commands. The BATADV_CMD_GET_HARDIFS (or short version BATADV_CMD_GET_HARDIF) is reused as get command because it already allow sto dump the content of other information from the slave/hard-interface which are not yet configuration specific. The set command BATADV_CMD_SET_HARDIF will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_HARDIF command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 16 ++- net/batman-adv/netlink.c | 240 +++++++++++++++++++++++++++++--- 2 files changed, 233 insertions(+), 23 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index b3394aac9a88..62456a087ef6 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -398,9 +398,15 @@ enum batadv_nl_commands { BATADV_CMD_GET_ROUTING_ALGOS, /** - * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the + * current softif */ - BATADV_CMD_GET_HARDIFS, + BATADV_CMD_GET_HARDIF, + + /** + * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF + */ + BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF, /** * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations @@ -453,6 +459,12 @@ enum batadv_nl_commands { */ BATADV_CMD_SET_MESH, + /** + * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the + * current softif + */ + BATADV_CMD_SET_HARDIF, + /* add new commands above here */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 179cc7cfcb70..a184e4225fb5 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -76,6 +77,13 @@ enum batadv_genl_ops_flags { * saved in info->user_ptr[0] */ BATADV_FLAG_NEED_MESH = BIT(0), + + /** + * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in + * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_HARDIF = BIT(1), }; static const struct genl_multicast_group batadv_netlink_mcgrps[] = { @@ -446,29 +454,38 @@ batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message + * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * @cmd: type of message to generate * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message * @cb: Control block containing additional options - * @hard_iface: Hard interface to dump * - * Return: error code, or 0 on success + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, - struct netlink_callback *cb, - struct batadv_hard_iface *hard_iface) +static int batadv_netlink_hardif_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags, + struct netlink_callback *cb) { struct net_device *net_dev = hard_iface->net_dev; void *hdr; - hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, - &batadv_netlink_family, NLM_F_MULTI, - BATADV_CMD_GET_HARDIFS); + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) - return -EMSGSIZE; + return -ENOBUFS; - genl_dump_check_consistent(cb, hdr); + if (cb) + genl_dump_check_consistent(cb, hdr); + + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, net_dev->ifindex) || @@ -486,24 +503,107 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, genlmsg_end(msg, hdr); return 0; - nla_put_failure: +nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** - * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages + * batadv_netlink_notify_hardif() - send hardif attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * + * Return: 0 on success, < 0 on error + */ +static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_hardif() - Get hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + info->snd_portid, info->snd_seq, 0, + NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_hardif() - Set hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + + batadv_netlink_notify_hardif(bat_priv, hard_iface); + + return 0; +} + +/** + * batadv_netlink_dump_hardif() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: error code, or length of reply message on success */ static int -batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; int ifindex; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; @@ -523,6 +623,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return -ENODEV; } + bat_priv = netdev_priv(soft_iface); + rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; @@ -533,8 +635,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) if (i++ < skip) continue; - if (batadv_netlink_dump_hardif_entry(msg, portid, cb, - hard_iface)) { + if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb)) { i--; break; } @@ -583,6 +687,52 @@ err_put_softif: return ERR_PTR(-EINVAL); } +/** + * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to hard interface (with increased refcnt) on success, error + * pointer on error + */ +static struct batadv_hard_iface * +batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct net_device *hard_dev; + unsigned int hardif_index; + + if (!info->attrs[BATADV_ATTR_HARD_IFINDEX]) + return ERR_PTR(-EINVAL); + + hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]); + + hard_dev = dev_get_by_index(net, hardif_index); + if (!hard_dev) + return ERR_PTR(-ENODEV); + + hard_iface = batadv_hardif_get_by_netdev(hard_dev); + if (!hard_iface) + goto err_put_harddev; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + goto err_put_hardif; + + /* hard_dev is referenced by hard_iface and not needed here */ + dev_put(hard_dev); + + return hard_iface; + +err_put_hardif: + batadv_hardif_put(hard_iface); +err_put_harddev: + dev_put(hard_dev); + + return ERR_PTR(-EINVAL); +} + /** * batadv_pre_doit() - Prepare batman-adv genl doit request * @ops: requested netlink operation @@ -595,8 +745,21 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv = NULL; struct net_device *soft_iface; - struct batadv_priv *bat_priv; + u8 user_ptr1_flags; + u8 mesh_dep_flags; + int ret; + + user_ptr1_flags = BATADV_FLAG_NEED_HARDIF; + if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) + return -EINVAL; + + mesh_dep_flags = BATADV_FLAG_NEED_HARDIF; + if (WARN_ON((ops->internal_flags & mesh_dep_flags) && + (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) + return -EINVAL; if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { soft_iface = batadv_get_softif_from_info(net, info); @@ -607,7 +770,23 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = bat_priv; } + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) { + hard_iface = batadv_get_hardif_from_info(bat_priv, net, info); + if (IS_ERR(hard_iface)) { + ret = PTR_ERR(hard_iface); + goto err_put_softif; + } + + info->user_ptr[1] = hard_iface; + } + return 0; + +err_put_softif: + if (bat_priv) + dev_put(bat_priv->soft_iface); + + return ret; } /** @@ -619,8 +798,16 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv; + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && + info->user_ptr[1]) { + hard_iface = info->user_ptr[1]; + + batadv_hardif_put(hard_iface); + } + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { bat_priv = info->user_ptr[0]; dev_put(bat_priv->soft_iface); @@ -656,10 +843,13 @@ static const struct genl_ops batadv_netlink_ops[] = { .dumpit = batadv_algo_dump, }, { - .cmd = BATADV_CMD_GET_HARDIFS, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_HARDIF, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .dumpit = batadv_netlink_dump_hardifs, + .dumpit = batadv_netlink_dump_hardif, + .doit = batadv_netlink_get_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, @@ -722,6 +912,14 @@ static const struct genl_ops batadv_netlink_ops[] = { .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, + { + .cmd = BATADV_CMD_SET_HARDIF, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { From 49e7e37cd98122126e8da58df2fe2261c6e83df2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:41:08 +0100 Subject: [PATCH 0887/1436] batman-adv: Prepare framework for vlan genl config The batman-adv configuration interface was implemented solely using sysfs. This approach was condemned by non-batadv developers as "huge mistake". Instead a netlink/genl based implementation was suggested. Beside the mesh/soft-interface specific configuration, the VLANs on top of the mesh/soft-interface have configuration settings. The genl interface reflects this by allowing to get/set it using the vlan specific commands BATADV_CMD_GET_VLAN/BATADV_CMD_SET_VLAN. The set command BATADV_CMD_SET_MESH will also notify interested userspace listeners of the "config" mcast group using the BATADV_CMD_SET_VLAN command message type that settings might have been changed and what the current values are. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 17 +++ net/batman-adv/netlink.c | 191 +++++++++++++++++++++++++++++++- 2 files changed, 206 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 62456a087ef6..10f3cc34fe16 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -345,6 +345,11 @@ enum batadv_nl_attrs { */ BATADV_ATTR_MCAST_FLAGS_PRIV, + /** + * @BATADV_ATTR_VLANID: VLAN id on top of soft interface + */ + BATADV_ATTR_VLANID, + /* add attributes above here, update the policy in netlink.c */ /** @@ -465,6 +470,18 @@ enum batadv_nl_commands { */ BATADV_CMD_SET_HARDIF, + /** + * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the + * current softif + */ + BATADV_CMD_GET_VLAN, + + /** + * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the + * current softif + */ + BATADV_CMD_SET_VLAN, + /* add new commands above here */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index a184e4225fb5..d40913bb9031 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,13 @@ enum batadv_genl_ops_flags { * saved in info->user_ptr[1] */ BATADV_FLAG_NEED_HARDIF = BIT(1), + + /** + * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in + * attribute BATADV_ATTR_VLANID and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_VLAN = BIT(2), }; static const struct genl_multicast_group batadv_netlink_mcgrps[] = { @@ -130,6 +138,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, + [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, }; /** @@ -653,6 +662,123 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +/** + * batadv_netlink_vlan_fill() - Fill message with vlan attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_vlan_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; + + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_notify_vlan() - send vlan attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * + * Return: 0 on success, < 0 on error + */ +static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, + BATADV_CMD_SET_VLAN, 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + + batadv_netlink_notify_vlan(bat_priv, vlan); + + return 0; +} + /** * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes * @net: the applicable net namespace @@ -733,6 +859,34 @@ err_put_harddev: return ERR_PTR(-EINVAL); } +/** + * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to vlan on success (with increased refcnt), error pointer + * on error + */ +static struct batadv_softif_vlan * +batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_softif_vlan *vlan; + u16 vid; + + if (!info->attrs[BATADV_ATTR_VLANID]) + return ERR_PTR(-EINVAL); + + vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); + + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + if (!vlan) + return ERR_PTR(-ENOENT); + + return vlan; +} + /** * batadv_pre_doit() - Prepare batman-adv genl doit request * @ops: requested netlink operation @@ -747,16 +901,17 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct net *net = genl_info_net(info); struct batadv_hard_iface *hard_iface; struct batadv_priv *bat_priv = NULL; + struct batadv_softif_vlan *vlan; struct net_device *soft_iface; u8 user_ptr1_flags; u8 mesh_dep_flags; int ret; - user_ptr1_flags = BATADV_FLAG_NEED_HARDIF; + user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) return -EINVAL; - mesh_dep_flags = BATADV_FLAG_NEED_HARDIF; + mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; if (WARN_ON((ops->internal_flags & mesh_dep_flags) && (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) return -EINVAL; @@ -780,6 +935,16 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[1] = hard_iface; } + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) { + vlan = batadv_get_vlan_from_info(bat_priv, net, info); + if (IS_ERR(vlan)) { + ret = PTR_ERR(vlan); + goto err_put_softif; + } + + info->user_ptr[1] = vlan; + } + return 0; err_put_softif: @@ -799,6 +964,7 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; struct batadv_priv *bat_priv; if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && @@ -808,6 +974,11 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, batadv_hardif_put(hard_iface); } + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { + vlan = info->user_ptr[1]; + batadv_softif_vlan_put(vlan); + } + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { bat_priv = info->user_ptr[0]; dev_put(bat_priv->soft_iface); @@ -920,6 +1091,22 @@ static const struct genl_ops batadv_netlink_ops[] = { .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, }, + { + .cmd = BATADV_CMD_GET_VLAN, + /* can be retrieved by unprivileged users */ + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, + { + .cmd = BATADV_CMD_SET_VLAN, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { From 9ab4cee5ced970019a5d0f3a43cf85671ca7b38f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:46:14 +0100 Subject: [PATCH 0888/1436] batman-adv: Add aggregated_ogms mesh genl configuration The mesh interface can delay OGM messages to aggregate different ogms together in a single OGM packet. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_AGGREGATED_OGMS_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 10f3cc34fe16..f8941e80d6b4 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -350,6 +350,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_VLANID, + /** + * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol + * messages of the mesh interface shall be aggregated or not. + */ + BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index d40913bb9031..82665dc2eb2b 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -139,6 +139,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, + [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, }; /** @@ -214,6 +215,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; } + if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + !!atomic_read(&bat_priv->aggregated_ogms))) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -296,6 +301,13 @@ static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) { struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]; + + atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); + } batadv_netlink_notify_mesh(bat_priv); From e43d16b87dc2cad18799cfd1142f4acae4135ea4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:51:55 +0100 Subject: [PATCH 0889/1436] batman-adv: Add ap_isolation mesh/vlan genl configuration The mesh interface can drop messages between clients to implement a mesh-wide AP isolation. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH and BATADV_CMD_SET_VLAN/BATADV_CMD_GET_VLAN commands allow to set/get the configuration of this feature using the BATADV_ATTR_AP_ISOLATION_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. This feature also requires that skbuff which should be handled as isolated are marked. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the mark/mask using the u32 attributes BATADV_ATTR_ISOLATION_MARK and BATADV_ATTR_ISOLATION_MASK. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 19 +++++++ net/batman-adv/netlink.c | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f8941e80d6b4..a4dadafe08dd 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -356,6 +356,25 @@ enum batadv_nl_attrs { */ BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + /** + * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going + * from a wireless client to another wireless client will be silently + * dropped. + */ + BATADV_ATTR_AP_ISOLATION_ENABLED, + + /** + * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MARK, + + /** + * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MASK, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 82665dc2eb2b..fc80f003ed6e 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -140,6 +140,9 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, + [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, }; /** @@ -157,6 +160,52 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) return attr ? nla_get_u32(attr) : 0; } +/** + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + u8 ap_isolation; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return 0; + + ap_isolation = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_put(vlan); + + return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!ap_isolation); +} + +/** + * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg + * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return -ENOENT; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + batadv_softif_vlan_put(vlan); + + return 0; +} + /** * batadv_netlink_mesh_fill() - Fill message with mesh attributes * @msg: Netlink message to dump into @@ -219,6 +268,17 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, !!atomic_read(&bat_priv->aggregated_ogms))) goto nla_put_failure; + if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK, + bat_priv->isolation_mark)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK, + bat_priv->isolation_mark_mask)) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -309,6 +369,24 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); } + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; + + batadv_netlink_set_mesh_ap_isolation(attr, bat_priv); + } + + if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MARK]; + + bat_priv->isolation_mark = nla_get_u32(attr); + } + + if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MASK]; + + bat_priv->isolation_mark_mask = nla_get_u32(attr); + } + batadv_netlink_notify_mesh(bat_priv); return 0; @@ -705,6 +783,10 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg, if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) goto nla_put_failure; + if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!atomic_read(&vlan->ap_isolation))) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -785,6 +867,13 @@ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) { struct batadv_softif_vlan *vlan = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + } batadv_netlink_notify_vlan(bat_priv, vlan); From d7e52506b680826d6ff7ce73e6a90a3b9defc741 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 12:55:44 +0100 Subject: [PATCH 0890/1436] batman-adv: Add bonding mesh genl configuration The mesh interface can use multiple slave/hard-interface ports at the same time to transport the traffic to other nodes. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_BONDING_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a4dadafe08dd..f74ff261ec8f 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -375,6 +375,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ISOLATION_MASK, + /** + * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through + * the mesh will be sent using multiple interfaces at the same time. + */ + BATADV_ATTR_BONDING_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index fc80f003ed6e..310a2c339fd1 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -143,6 +143,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, + [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, }; /** @@ -279,6 +280,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, bat_priv->isolation_mark_mask)) goto nla_put_failure; + if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED, + !!atomic_read(&bat_priv->bonding))) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -387,6 +392,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) bat_priv->isolation_mark_mask = nla_get_u32(attr); } + if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BONDING_ENABLED]; + + atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); + } + batadv_netlink_notify_mesh(bat_priv); return 0; From 43ff6105a527aaa1a8e00163b0b0aedbbc0c4522 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:03:39 +0100 Subject: [PATCH 0891/1436] batman-adv: Add bridge_loop_avoidance mesh genl configuration The mesh interface can try to detect loops in the same mesh caused by (indirectly) bridged mesh/soft-interfaces of different nodes. Some of the loops can also be resolved without breaking the mesh. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f74ff261ec8f..3cb35c661056 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -381,6 +381,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_BONDING_ENABLED, + /** + * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop + * avoidance feature is enabled. This feature detects and avoids loops + * between the mesh and devices bridged with the soft interface + */ + BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 310a2c339fd1..40c940da9498 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -144,6 +144,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, }; /** @@ -284,6 +285,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, !!atomic_read(&bat_priv->bonding))) goto nla_put_failure; +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + !!atomic_read(&bat_priv->bridge_loop_avoidance))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BLA */ + if (primary_if) batadv_hardif_put(primary_if); @@ -398,6 +405,16 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); } +#ifdef CONFIG_BATMAN_ADV_BLA + if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]; + + atomic_set(&bat_priv->bridge_loop_avoidance, + !!nla_get_u8(attr)); + batadv_bla_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_BLA */ + batadv_netlink_notify_mesh(bat_priv); return 0; From a1c8de80329609ba68ff860074070efb1e14ade4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:06:42 +0100 Subject: [PATCH 0892/1436] batman-adv: Add distributed_arp_table mesh genl configuration The mesh interface can use a distributed hash table to answer ARP requests without flooding the request through the whole mesh. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 8 ++++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 3cb35c661056..f303a1496476 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -388,6 +388,14 @@ enum batadv_nl_attrs { */ BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + /** + * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed + * arp table feature is enabled. This feature uses a distributed hash + * table to answer ARP requests without flooding the request through + * the whole mesh. + */ + BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 40c940da9498..3028c2a5c782 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -145,6 +145,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, }; /** @@ -291,6 +292,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_BLA */ +#ifdef CONFIG_BATMAN_ADV_DAT + if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + !!atomic_read(&bat_priv->distributed_arp_table))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DAT */ + if (primary_if) batadv_hardif_put(primary_if); @@ -415,6 +422,16 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } #endif /* CONFIG_BATMAN_ADV_BLA */ +#ifdef CONFIG_BATMAN_ADV_DAT + if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]; + + atomic_set(&bat_priv->distributed_arp_table, + !!nla_get_u8(attr)); + batadv_dat_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_DAT */ + batadv_netlink_notify_mesh(bat_priv); return 0; From 3e15b06eb7e410ef9f1b9673be094b3e10eacf93 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:09:49 +0100 Subject: [PATCH 0893/1436] batman-adv: Add fragmentation mesh genl configuration The mesh interface can fragment unicast packets when the packet size exceeds the outgoing slave/hard-interface MTU. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_FRAGMENTATION_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ net/batman-adv/netlink.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f303a1496476..847841b8de5d 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -396,6 +396,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + /** + * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going + * through the mesh will be fragmented or silently discarded if the + * packet size exceeds the outgoing interface MTU. + */ + BATADV_ATTR_FRAGMENTATION_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 3028c2a5c782..5b2160d2f482 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -146,6 +146,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, }; /** @@ -298,6 +299,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_DAT */ + if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED, + !!atomic_read(&bat_priv->fragmentation))) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -432,6 +437,13 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } #endif /* CONFIG_BATMAN_ADV_DAT */ + if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; + + atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); + batadv_update_min_mtu(bat_priv->soft_iface); + } + batadv_netlink_notify_mesh(bat_priv); return 0; From e2d0d35b5b0ce420505e88255fd5922ed035bb8d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:15:00 +0100 Subject: [PATCH 0894/1436] batman-adv: Add gateway mesh genl configuration The mesh/soft-interface can optimize the handling of DHCP packets. Instead of flooding them through the whole mesh, it can be forwarded as unicast to a specific gateway server. The originator which injects the packets in the mesh has to select (based on sel_class thresholds) a responsible gateway server. This is done by switching this originator to the gw_mode client. The servers announce their forwarding bandwidth (download/upload) when the gw_mode server was selected. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the attributes: * u8 BATADV_ATTR_GW_MODE (0 == off, 1 == client, 2 == server) * u32 BATADV_ATTR_GW_BANDWIDTH_DOWN (in 100 kbit/s steps) * u32 BATADV_ATTR_GW_BANDWIDTH_UP (in 100 kbit/s steps) * u32 BATADV_ATTR_GW_SEL_CLASS Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 40 ++++++++++++++ net/batman-adv/gateway_client.c | 1 - net/batman-adv/gateway_common.c | 1 + net/batman-adv/gateway_common.h | 6 --- net/batman-adv/netlink.c | 92 +++++++++++++++++++++++++++++++++ net/batman-adv/soft-interface.c | 2 +- net/batman-adv/sysfs.c | 1 + 7 files changed, 135 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 847841b8de5d..165272be6878 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -139,6 +139,20 @@ enum batadv_mcast_flags_priv { BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING = (1 << 4), }; +/** + * enum batadv_gw_modes - gateway mode of node + */ +enum batadv_gw_modes { + /** @BATADV_GW_MODE_OFF: gw mode disabled */ + BATADV_GW_MODE_OFF, + + /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ + BATADV_GW_MODE_CLIENT, + + /** @BATADV_GW_MODE_SERVER: announce itself as gatway server */ + BATADV_GW_MODE_SERVER, +}; + /** * enum batadv_nl_attrs - batman-adv netlink attributes */ @@ -403,6 +417,32 @@ enum batadv_nl_attrs { */ BATADV_ATTR_FRAGMENTATION_ENABLED, + /** + * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_GW_MODE: defines the state of the gateway features. + * Possible values are specified in enum batadv_gw_modes + */ + BATADV_ATTR_GW_MODE, + + /** + * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node + * will use to choose a gateway if gw_mode was set to 'client'. + */ + BATADV_ATTR_GW_SEL_CLASS, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0a6b9b30eabd..f5811f61aa92 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -47,7 +47,6 @@ #include #include -#include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "netlink.h" diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 53d03adc9bfc..e064de45e22c 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "gateway_client.h" #include "log.h" diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 89bae100a0b0..128467a0fb89 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -25,12 +25,6 @@ struct net_device; -enum batadv_gw_modes { - BATADV_GW_MODE_OFF, - BATADV_GW_MODE_CLIENT, - BATADV_GW_MODE_SERVER, -}; - /** * enum batadv_bandwidth_units - bandwidth unit types */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 5b2160d2f482..473467afea91 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -51,6 +51,7 @@ #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" #include "multicast.h" #include "originator.h" @@ -147,6 +148,10 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, }; /** @@ -303,6 +308,28 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, !!atomic_read(&bat_priv->fragmentation))) goto nla_put_failure; + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN, + atomic_read(&bat_priv->gw.bandwidth_down))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP, + atomic_read(&bat_priv->gw.bandwidth_up))) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_GW_MODE, + atomic_read(&bat_priv->gw.mode))) + goto nla_put_failure; + + if (bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* GW selection class is not available if the routing algorithm + * in use does not implement the GW API + */ + if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS, + atomic_read(&bat_priv->gw.sel_class))) + goto nla_put_failure; + } + if (primary_if) batadv_hardif_put(primary_if); @@ -444,6 +471,71 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) batadv_update_min_mtu(bat_priv->soft_iface); } + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]; + + atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]; + + atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_MODE]) { + u8 gw_mode; + + attr = info->attrs[BATADV_ATTR_GW_MODE]; + gw_mode = nla_get_u8(attr); + + if (gw_mode <= BATADV_GW_MODE_SERVER) { + /* Invoking batadv_gw_reselect() is not enough to really + * de-select the current GW. It will only instruct the + * gateway client code to perform a re-election the next + * time that this is needed. + * + * When gw client mode is being switched off the current + * GW must be de-selected explicitly otherwise no GW_ADD + * uevent is thrown on client mode re-activation. This + * is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); + + /* always call batadv_gw_check_client_stop() before + * changing the gateway state + */ + batadv_gw_check_client_stop(bat_priv); + atomic_set(&bat_priv->gw.mode, gw_mode); + batadv_gw_tvlv_container_update(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] && + bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + + u32 sel_class_max = 0xffffffffu; + u32 sel_class; + + attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS]; + sel_class = nla_get_u32(attr); + + if (!bat_priv->algo_ops->gw.store_sel_class) + sel_class_max = BATADV_TQ_MAX_VALUE; + + if (sel_class >= 1 && sel_class <= sel_class_max) { + atomic_set(&bat_priv->gw.sel_class, sel_class); + batadv_gw_reselect(bat_priv); + } + } + batadv_netlink_notify_mesh(bat_priv); return 0; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b14fb3462af7..12028c287de5 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -50,13 +50,13 @@ #include #include #include +#include #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" -#include "gateway_common.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index e1b816262c53..64fd5932a555 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" From bfc7f1be57b8a5ea738ce5db62b82234e4901abf Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:19:38 +0100 Subject: [PATCH 0895/1436] batman-adv: Add hop_penalty mesh genl configuration The TQ (B.A.T.M.A.N. IV) and throughput values (B.A.T.M.A.N. V) are reduced when they are forwarded. One of the reductions is the penalty for traversing an additional hop. This hop_penalty (0-255) defines the percentage of reduction (0-100%). The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u8 BATADV_ATTR_HOP_PENALTY attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 165272be6878..b37cb923332e 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -443,6 +443,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_GW_SEL_CLASS, + /** + * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied + * to an originator message's tq-field on every hop. + */ + BATADV_ATTR_HOP_PENALTY, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 473467afea91..ce6e6f078765 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -152,6 +152,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, + [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, }; /** @@ -330,6 +331,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; } + if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, + atomic_read(&bat_priv->hop_penalty))) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -536,6 +541,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { + attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; + + atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); + } + batadv_netlink_notify_mesh(bat_priv); return 0; From b85bd091098a52f7bf00d2725b536455f82ba0d0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:22:33 +0100 Subject: [PATCH 0896/1436] batman-adv: Add log_level mesh genl configuration In contrast to other modules, batman-adv allows to set the debug message verbosity per mesh/soft-interface and not per module (via modparam). The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u32 (bitmask) BATADV_ATTR_LOG_LEVEL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index b37cb923332e..6d36e4b47eb4 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -449,6 +449,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_HOP_PENALTY, + /** + * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages + * should be send to the debug log/trace ring buffer + */ + BATADV_ATTR_LOG_LEVEL, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index ce6e6f078765..8c019d46815c 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -53,6 +53,7 @@ #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "multicast.h" #include "originator.h" #include "soft-interface.h" @@ -153,6 +154,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, + [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, }; /** @@ -335,6 +337,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, atomic_read(&bat_priv->hop_penalty))) goto nla_put_failure; +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL, + atomic_read(&bat_priv->log_level))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + if (primary_if) batadv_hardif_put(primary_if); @@ -547,6 +555,15 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); } +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (info->attrs[BATADV_ATTR_LOG_LEVEL]) { + attr = info->attrs[BATADV_ATTR_LOG_LEVEL]; + + atomic_set(&bat_priv->log_level, + nla_get_u32(attr) & BATADV_DBG_ALL); + } +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + batadv_netlink_notify_mesh(bat_priv); return 0; From f75b56bc91122e2934e2cb458f98727c41d535c7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:25:05 +0100 Subject: [PATCH 0897/1436] batman-adv: Add multicast forceflood mesh genl configuration The mesh interface can optimize the flooding of multicast packets based on the content of the global translation tables. To disable this behavior and use the broadcast-like flooding of the packets, forceflood has to be enabled. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED attribute. Setting the u8 to zero will disable this feature (allowing multicast optimizations) and setting it to something else is enabling this feature (forcing simple flooding). Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 9 +++++++++ net/batman-adv/netlink.c | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 6d36e4b47eb4..38caaaae8a05 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -455,6 +455,15 @@ enum batadv_nl_attrs { */ BATADV_ATTR_LOG_LEVEL, + /** + * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast + * optimizations should be replaced by simple broadcast-like flooding + * of multicast packets. If set to non-zero then all nodes in the mesh + * are going to use classic flooding for any multicast packet with no + * optimizations. + */ + BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 8c019d46815c..475bd15f806c 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -155,6 +155,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, + [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, }; /** @@ -343,6 +344,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_DEBUG */ +#ifdef CONFIG_BATMAN_ADV_MCAST + if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + !atomic_read(&bat_priv->multicast_mode))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_MCAST */ + if (primary_if) batadv_hardif_put(primary_if); @@ -564,6 +571,14 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } #endif /* CONFIG_BATMAN_ADV_DEBUG */ +#ifdef CONFIG_BATMAN_ADV_MCAST + if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) { + attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]; + + atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); + } +#endif /* CONFIG_BATMAN_ADV_MCAST */ + batadv_netlink_notify_mesh(bat_priv); return 0; From 6c57cde6800bae2361b8ac14a5924ffc592b3a90 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:26:14 +0100 Subject: [PATCH 0898/1436] batman-adv: Add network_coding mesh genl configuration The mesh interface can use (in an homogeneous mesh) network coding, a mechanism that aims to increase the overall network throughput by fusing multiple packets in one transmission. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the BATADV_ATTR_NETWORK_CODING_ENABLED attribute. Setting the u8 to zero will disable this feature and setting it to something else is enabling this feature. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 7 +++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 38caaaae8a05..a4239c147bde 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -464,6 +464,13 @@ enum batadv_nl_attrs { */ BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + /** + * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using + * some magic to send fewer wifi packets but still the same content) is + * enabled or not. + */ + BATADV_ATTR_NETWORK_CODING_ENABLED, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 475bd15f806c..6f01f92e6ab3 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -55,6 +55,7 @@ #include "hard-interface.h" #include "log.h" #include "multicast.h" +#include "network-coding.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" @@ -156,6 +157,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, }; /** @@ -350,6 +352,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_MCAST */ +#ifdef CONFIG_BATMAN_ADV_NC + if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED, + !!atomic_read(&bat_priv->network_coding))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_NC */ + if (primary_if) batadv_hardif_put(primary_if); @@ -579,6 +587,15 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } #endif /* CONFIG_BATMAN_ADV_MCAST */ +#ifdef CONFIG_BATMAN_ADV_NC + if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; + + atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); + batadv_nc_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_NC */ + batadv_netlink_notify_mesh(bat_priv); return 0; From 7b751b39f018a828a04692e199c044087102e96c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:28:02 +0100 Subject: [PATCH 0899/1436] batman-adv: Add orig_interval mesh genl configuration The OGM packets are transmitted every orig_interval milliseconds. This value can be changed using the configuration interface. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_ORIG_INTERVAL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index a4239c147bde..6bedd4889c37 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -471,6 +471,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_NETWORK_CODING_ENABLED, + /** + * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in + * which batman sends its protocol messages. + */ + BATADV_ATTR_ORIG_INTERVAL, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 6f01f92e6ab3..e25c139ba0e1 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -158,6 +158,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, }; /** @@ -358,6 +359,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_NC */ + if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL, + atomic_read(&bat_priv->orig_interval))) + goto nla_put_failure; + if (primary_if) batadv_hardif_put(primary_if); @@ -596,6 +601,18 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) } #endif /* CONFIG_BATMAN_ADV_NC */ + if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) { + u32 orig_interval; + + attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL]; + orig_interval = nla_get_u32(attr); + + orig_interval = min_t(u32, orig_interval, INT_MAX); + orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER); + + atomic_set(&bat_priv->orig_interval, orig_interval); + } + batadv_netlink_notify_mesh(bat_priv); return 0; From a108008290405545b43b9c7975344bc59af2341b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:30:04 +0100 Subject: [PATCH 0900/1436] batman-adv: Add elp_interval hardif genl configuration The ELP packets are transmitted every elp_interval milliseconds on an slave/hard-interface. This value can be changed using the configuration interface. The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_ELP_INTERVAL attribute. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 6 ++++++ net/batman-adv/netlink.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 6bedd4889c37..f966e497361b 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -477,6 +477,12 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ORIG_INTERVAL, + /** + * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in + * which batman emits probing packets for neighbor sensing (ELP). + */ + BATADV_ATTR_ELP_INTERVAL, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index e25c139ba0e1..09187e6e92a3 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -159,6 +159,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, }; /** @@ -825,6 +826,12 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg, goto nla_put_failure; } +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, + atomic_read(&hard_iface->bat_v.elp_interval))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + genlmsg_end(msg, hdr); return 0; @@ -910,6 +917,16 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, struct batadv_hard_iface *hard_iface = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { + attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; + + atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); + } +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + batadv_netlink_notify_hardif(bat_priv, hard_iface); return 0; From 9a182242f17c06fad620663e6fdf992e97661e66 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 13:31:23 +0100 Subject: [PATCH 0901/1436] batman-adv: Add throughput_override hardif genl configuration The B.A.T.M.A.N. V implementation tries to estimate the link throughput of an interface to an originator using different automatic methods. It is still possible to overwrite it the link throughput for all reachable originators via this interface. The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get the configuration of this feature using the u32 BATADV_ATTR_THROUGHPUT_OVERRIDE attribute. The used unit is in 100 Kbit/s. If the value is set to 0 then batman-adv will try to estimate the throughput by itself. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 8 ++++++++ net/batman-adv/netlink.c | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index f966e497361b..305bf316dd03 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -483,6 +483,14 @@ enum batadv_nl_attrs { */ BATADV_ATTR_ELP_INTERVAL, + /** + * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be + * used by B.A.T.M.A.N. V when estimating the link throughput using + * this interface. If the value is set to 0 then batman-adv will try to + * estimate the throughput by itself. + */ + BATADV_ATTR_THROUGHPUT_OVERRIDE, + /* add attributes above here, update the policy in netlink.c */ /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 09187e6e92a3..476b4c6017c9 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -160,6 +160,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 }, }; /** @@ -830,6 +831,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg, if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, atomic_read(&hard_iface->bat_v.elp_interval))) goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE, + atomic_read(&hard_iface->bat_v.throughput_override))) + goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_BATMAN_V */ genlmsg_end(msg, hdr); @@ -925,6 +930,13 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); } + + if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { + attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]; + + atomic_set(&hard_iface->bat_v.throughput_override, + nla_get_u32(attr)); + } #endif /* CONFIG_BATMAN_ADV_BATMAN_V */ batadv_netlink_notify_hardif(bat_priv, hard_iface); From 7e6f461efe2554e35b740f3faea2994fc9551947 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 23 Nov 2018 16:07:12 +0100 Subject: [PATCH 0902/1436] batman-adv: Trigger genl notification on sysfs config change The generic netlink code is expected to trigger notification messages when configuration might have been changed. But the configuration of batman-adv is most of the time still done using sysfs. So the sysfs interface should also trigger the corresponding netlink messages via the "config" multicast group. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/netlink.c | 10 +++---- net/batman-adv/netlink.h | 6 ++++ net/batman-adv/sysfs.c | 63 +++++++++++++++++++++++++++++++++------- 3 files changed, 63 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 476b4c6017c9..67a58da2e6a0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -385,7 +385,7 @@ nla_put_failure: * * Return: 0 on success, < 0 on error */ -static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) { struct sk_buff *msg; int ret; @@ -852,8 +852,8 @@ nla_put_failure: * * Return: 0 on success, < 0 on error */ -static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, - struct batadv_hard_iface *hard_iface) +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) { struct sk_buff *msg; int ret; @@ -1057,8 +1057,8 @@ nla_put_failure: * * Return: 0 on success, < 0 on error */ -static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, - struct batadv_softif_vlan *vlan) +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) { struct sk_buff *msg; int ret; diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 216484b8b82d..7273368544fc 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv); +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface); +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan); + extern struct genl_family batadv_netlink_family; #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 64fd5932a555..0b4b3fb778a6 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -48,6 +48,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "soft-interface.h" @@ -154,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ \ - return __batadv_store_bool_attr(buff, count, _post_func, attr, \ - &bat_priv->_name, net_dev); \ + length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ + &bat_priv->_name, net_dev); \ + \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_BOOL(_name) \ @@ -186,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ \ - return __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &bat_priv->_var, net_dev, \ - NULL); \ + length = __batadv_store_uint_attr(buff, count, _min, _max, \ + _post_func, attr, \ + &bat_priv->_var, net_dev, \ + NULL); \ + \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -223,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ \ + if (vlan->vid) \ + batadv_netlink_notify_vlan(bat_priv, vlan); \ + else \ + batadv_netlink_notify_mesh(bat_priv); \ + \ batadv_softif_vlan_put(vlan); \ return res; \ } @@ -256,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_hard_iface *hard_iface; \ + struct batadv_priv *bat_priv; \ ssize_t length; \ \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ @@ -268,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ hard_iface->soft_iface, \ net_dev); \ \ + if (hard_iface->soft_iface) { \ + bat_priv = netdev_priv(hard_iface->soft_iface); \ + batadv_netlink_notify_hardif(bat_priv, hard_iface); \ + } \ + \ batadv_hardif_put(hard_iface); \ return length; \ } @@ -537,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); + + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -563,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, size_t count) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + ssize_t length; /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API @@ -578,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, count); - return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_reselect, attr, - &bat_priv->gw.sel_class, - bat_priv->soft_iface, NULL); + length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface, NULL); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, @@ -601,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + ssize_t length; if (buff[count - 1] == '\n') buff[count - 1] = '\0'; - return batadv_gw_bandwidth_set(net_dev, buff, count); + length = batadv_gw_bandwidth_set(net_dev, buff, count); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } /** @@ -674,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, "New skb mark for extended isolation: %#.8x/%#.8x\n", bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -1078,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; u32 tp_override; @@ -1108,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + out: batadv_hardif_put(hard_iface); return count; From 628442880af8c201d307a45f3862a7a17df8a189 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Feb 2019 13:52:26 -0800 Subject: [PATCH 0903/1436] Input: cap11xx - switch to using set_brightness_blocking() Updating LED state requires access to regmap and therefore we may sleep, so we could not do that directly form set_brightness() method. Historically we used private work to adjust the brightness, but with the introduction of set_brightness_blocking() we no longer need it. As a bonus, not having our own work item means we do not have use-after-free issue as we neglected to cancel outstanding work on driver unbind. Reported-by: Sven Van Asbroeck Reviewed-by: Sven Van Asbroeck Acked-by: Jacek Anaszewski Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/cap11xx.c | 39 +++++++++++--------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c index 312916f99597..73686c2460ce 100644 --- a/drivers/input/keyboard/cap11xx.c +++ b/drivers/input/keyboard/cap11xx.c @@ -75,9 +75,7 @@ struct cap11xx_led { struct cap11xx_priv *priv; struct led_classdev cdev; - struct work_struct work; u32 reg; - enum led_brightness new_brightness; }; #endif @@ -233,30 +231,21 @@ static void cap11xx_input_close(struct input_dev *idev) } #ifdef CONFIG_LEDS_CLASS -static void cap11xx_led_work(struct work_struct *work) -{ - struct cap11xx_led *led = container_of(work, struct cap11xx_led, work); - struct cap11xx_priv *priv = led->priv; - int value = led->new_brightness; - - /* - * All LEDs share the same duty cycle as this is a HW limitation. - * Brightness levels per LED are either 0 (OFF) and 1 (ON). - */ - regmap_update_bits(priv->regmap, CAP11XX_REG_LED_OUTPUT_CONTROL, - BIT(led->reg), value ? BIT(led->reg) : 0); -} - -static void cap11xx_led_set(struct led_classdev *cdev, - enum led_brightness value) +static int cap11xx_led_set(struct led_classdev *cdev, + enum led_brightness value) { struct cap11xx_led *led = container_of(cdev, struct cap11xx_led, cdev); + struct cap11xx_priv *priv = led->priv; - if (led->new_brightness == value) - return; - - led->new_brightness = value; - schedule_work(&led->work); + /* + * All LEDs share the same duty cycle as this is a HW + * limitation. Brightness levels per LED are either + * 0 (OFF) and 1 (ON). + */ + return regmap_update_bits(priv->regmap, + CAP11XX_REG_LED_OUTPUT_CONTROL, + BIT(led->reg), + value ? BIT(led->reg) : 0); } static int cap11xx_init_leds(struct device *dev, @@ -299,7 +288,7 @@ static int cap11xx_init_leds(struct device *dev, led->cdev.default_trigger = of_get_property(child, "linux,default-trigger", NULL); led->cdev.flags = 0; - led->cdev.brightness_set = cap11xx_led_set; + led->cdev.brightness_set_blocking = cap11xx_led_set; led->cdev.max_brightness = 1; led->cdev.brightness = LED_OFF; @@ -312,8 +301,6 @@ static int cap11xx_init_leds(struct device *dev, led->reg = reg; led->priv = priv; - INIT_WORK(&led->work, cap11xx_led_work); - error = devm_led_classdev_register(dev, &led->cdev); if (error) { of_node_put(child); From 33a841ce5cef4ca6c18ad333248b6d273f54c839 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 7 Feb 2019 14:22:42 -0800 Subject: [PATCH 0904/1436] Input: ps2-gpio - flush TX work when closing port To ensure that TX work is not running after serio port has been torn down, let's flush it when closing the port. Reported-by: Sven Van Asbroeck Acked-by: Danilo Krummrich Reviewed-by: Sven Van Asbroeck Signed-off-by: Dmitry Torokhov --- drivers/input/serio/ps2-gpio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/ps2-gpio.c b/drivers/input/serio/ps2-gpio.c index c62cceb97bb1..5e8d8384aa2a 100644 --- a/drivers/input/serio/ps2-gpio.c +++ b/drivers/input/serio/ps2-gpio.c @@ -76,6 +76,7 @@ static void ps2_gpio_close(struct serio *serio) { struct ps2_gpio_data *drvdata = serio->port_data; + flush_delayed_work(&drvdata->tx_work); disable_irq(drvdata->irq); } From a342083abe576db43594a32d458a61fa81f7cb32 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 7 Feb 2019 14:39:40 -0800 Subject: [PATCH 0905/1436] Input: matrix_keypad - use flush_delayed_work() We should be using flush_delayed_work() instead of flush_work() in matrix_keypad_stop() to ensure that we are not missing work that is scheduled but not yet put in the workqueue (i.e. its delay timer has not expired yet). Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 403452ef00e6..3d1cb7bf5e35 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -222,7 +222,7 @@ static void matrix_keypad_stop(struct input_dev *dev) keypad->stopped = true; spin_unlock_irq(&keypad->lock); - flush_work(&keypad->work.work); + flush_delayed_work(&keypad->work); /* * matrix_keypad_scan() will leave IRQs enabled; * we should disable them now. From 8497ded2d16caabad1d9be7e7ec9b085e48ddfe4 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Sat, 9 Feb 2019 07:53:28 +0000 Subject: [PATCH 0906/1436] net/tls: Disable async decrytion for tls1.3 Function tls_sw_recvmsg() dequeues multiple records from stream parser and decrypts them. In case the decryption is done by async accelerator, the records may get submitted for decryption while the previous ones may not have been decryted yet. For tls1.3, the record type is known only after decryption. Therefore, for tls1.3, tls_sw_recvmsg() may submit records for decryption even if it gets 'handshake' records after 'data' records. These intermediate 'handshake' records may do a key updation. By the time new keys are given to ktls by userspace, it is possible that ktls has already submitted some records i(which are encrypted with new keys) for decryption using old keys. This would lead to decrypt failure. Therefore, async decryption of records should be disabled for tls1.3. Fixes: 130b392c6cd6b ("net: tls: Add tls 1.3 support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8051a9164139..fe8c287cbaa1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2215,8 +2215,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + + if (crypto_info->version == TLS_1_3_VERSION) + sw_ctx_rx->async_capable = false; + else + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; /* Set up strparser */ memset(&cb, 0, sizeof(cb)); From 50684da7427b3c36ce0ccf89814df28d9dbe3fe4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Feb 2019 09:46:53 +0100 Subject: [PATCH 0907/1436] net: phy: remove unneeded masking of PHY register read results PHY registers are only 16 bits wide, therefore, if the read was successful, there's no need to mask out the higher 16 bits. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d4fc1fd8af41..31f9e7c491a1 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -676,13 +676,13 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr, phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - *devices_in_package = (phy_reg & 0xffff) << 16; + *devices_in_package = phy_reg << 16; reg_addr = MII_ADDR_C45 | dev_addr << 16 | MDIO_DEVS1; phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - *devices_in_package |= (phy_reg & 0xffff); + *devices_in_package |= phy_reg; /* Bit 0 doesn't represent a device, it indicates c22 regs presence */ *devices_in_package &= ~BIT(0); @@ -746,13 +746,13 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id, phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - c45_ids->device_ids[i] = (phy_reg & 0xffff) << 16; + c45_ids->device_ids[i] = phy_reg << 16; reg_addr = MII_ADDR_C45 | i << 16 | MII_PHYSID2; phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; - c45_ids->device_ids[i] |= (phy_reg & 0xffff); + c45_ids->device_ids[i] |= phy_reg; } *phy_id = 0; return 0; @@ -789,14 +789,14 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; } - *phy_id = (phy_reg & 0xffff) << 16; + *phy_id = phy_reg << 16; /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); if (phy_reg < 0) return -EIO; - *phy_id |= (phy_reg & 0xffff); + *phy_id |= phy_reg; return 0; } From 92ed2eb7f4b73e9944b432e5c8c65ec3ec00eb9d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 9 Feb 2019 14:46:30 +0100 Subject: [PATCH 0908/1436] net: phy: probe the PHY before determining the supported features We will soon support asking the PHY at runtime to determine what features it supports, rather than forcing it to be compile time. But we should probe the PHY first. So probe the phy driver earlier. Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 31f9e7c491a1..92b7a71df0ac 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2220,6 +2220,18 @@ static int phy_probe(struct device *dev) mutex_lock(&phydev->lock); + if (phydev->drv->probe) { + /* Deassert the reset signal */ + phy_device_reset(phydev, 0); + + err = phydev->drv->probe(phydev); + if (err) { + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + goto out; + } + } + /* Start out supporting everything. Eventually, * a controller will attach, and may modify one * or both of these values @@ -2267,17 +2279,7 @@ static int phy_probe(struct device *dev) /* Set the state to READY by default */ phydev->state = PHY_READY; - if (phydev->drv->probe) { - /* Deassert the reset signal */ - phy_device_reset(phydev, 0); - - err = phydev->drv->probe(phydev); - if (err) { - /* Assert the reset signal */ - phy_device_reset(phydev, 1); - } - } - +out: mutex_unlock(&phydev->lock); return err; From efbdfdc29bdd4dbf79ad4bddc8f7a5ac62c66bfe Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 9 Feb 2019 15:24:47 +0100 Subject: [PATCH 0909/1436] net: phy: Add support for asking the PHY its abilities Add support for runtime determination of what the PHY supports, by adding a new function to the phy driver. The get_features call should set the phydev->supported member with the features the PHY supports. It is only called if phydrv->features is NULL. This requires minor changes to pause. The PHY driver should not set pause abilities, except for when it has odd cause capabilities, e.g. pause cannot be disabled. With this change, phydev->supported already contains the drivers abilities, including pause. So rather than considering phydrv->features, look at the phydev->supported, and enable pause if neither of the pause bits are already set. Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: fixed small checkpatch complaint in one comment] Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 31 +++++++++++++++---------------- include/linux/phy.h | 6 ++++++ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 92b7a71df0ac..8573d17ece0f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2236,7 +2236,14 @@ static int phy_probe(struct device *dev) * a controller will attach, and may modify one * or both of these values */ - linkmode_copy(phydev->supported, phydrv->features); + if (phydrv->features) { + linkmode_copy(phydev->supported, phydrv->features); + } else { + err = phydrv->get_features(phydev); + if (err) + goto out; + } + of_set_phy_supported(phydev); linkmode_copy(phydev->advertising, phydev->supported); @@ -2256,20 +2263,8 @@ static int phy_probe(struct device *dev) * (e.g. hardware erratum) where the driver wants to set only one * of these bits. */ - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) || - test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) { - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - } else { + if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) && + !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) { linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -2315,7 +2310,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; - if (WARN_ON(!new_driver->features)) { + /* Either the features are hard coded, or dynamically + * determine. It cannot be both or neither + */ + if (WARN_ON((!new_driver->features && !new_driver->get_features) || + (new_driver->features && new_driver->get_features))) { pr_err("%s: Driver features are missing\n", new_driver->name); return -EINVAL; } diff --git a/include/linux/phy.h b/include/linux/phy.h index f41bf651f6a0..d2ffae992e4a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -502,6 +502,12 @@ struct phy_driver { */ int (*probe)(struct phy_device *phydev); + /* + * Probe the hardware to determine what abilities it has. + * Should only set phydev->supported. + */ + int (*get_features)(struct phy_device *phydev); + /* PHY Power Management */ int (*suspend)(struct phy_device *phydev); int (*resume)(struct phy_device *phydev); From e240b7dbb73c4d5a2997356992a3cfe3ff99951a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 9 Feb 2019 16:06:51 +0000 Subject: [PATCH 0910/1436] net: marvell: mvpp2: clear flow control modes in 10G mode When mvpp2 configures the flow control modes in mvpp2_xlg_config() for 10G mode, it only ever set the flow control enable bits. There is no mechanism to clear these bits, which means that userspace is unable to use standard APIs to disable flow control (the only way is to poke the register directly.) Fix the missing bit clearance to allow flow control to be disabled. This means that, by default, as there is no negotiation in 10G modes with mvpp2, flow control is now disabled rather than being rx-only. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8a2dd9104e37..eef8833e5aae 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4515,8 +4515,13 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode, if (state->pause & MLO_PAUSE_TX) ctrl0 |= MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN; + else + ctrl0 &= ~MVPP22_XLG_CTRL0_TX_FLOW_CTRL_EN; + if (state->pause & MLO_PAUSE_RX) ctrl0 |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN; + else + ctrl0 &= ~MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN; ctrl4 &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC; ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC | From 132c4e9e6ac5d97185b03529a062bdf9d14dcb9d Mon Sep 17 00:00:00 2001 From: yupeng Date: Sat, 9 Feb 2019 14:46:18 -0800 Subject: [PATCH 0911/1436] add snmp counter document add document for tcp retransmission, tcp fast open, syn cookies, challenge ack, prune and several general counters Signed-off-by: yupeng Signed-off-by: David S. Miller --- Documentation/networking/snmp_counter.rst | 184 +++++++++++++++++++++- 1 file changed, 181 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index c5642f430d2e..52b026be028f 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -367,16 +367,19 @@ to the accept queue. TCP Fast Open ============= * TcpEstabResets + Defined in `RFC1213 tcpEstabResets`_. .. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48 * TcpAttemptFails + Defined in `RFC1213 tcpAttemptFails`_. .. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48 * TcpOutRsts + Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates the 'segments sent containing the RST flag', but in linux kernel, this couner indicates the segments kerenl tried to send. The sending @@ -384,6 +387,30 @@ process might be failed due to some errors (e.g. memory alloc failed). .. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52 +* TcpExtTCPSpuriousRtxHostQueues + +When the TCP stack wants to retransmit a packet, and finds that packet +is not lost in the network, but the packet is not sent yet, the TCP +stack would give up the retransmission and update this counter. It +might happen if a packet stays too long time in a qdisc or driver +queue. + +* TcpEstabResets + +The socket receives a RST packet in Establish or CloseWait state. + +* TcpExtTCPKeepAlive + +This counter indicates many keepalive packets were sent. The keepalive +won't be enabled by default. A userspace program could enable it by +setting the SO_KEEPALIVE socket option. + +* TcpExtTCPSpuriousRTOs + +The spurious retransmission timeout detected by the `F-RTO`_ +algorithm. + +.. _F-RTO: https://tools.ietf.org/html/rfc5682 TCP Fast Path ============ @@ -609,6 +636,29 @@ packet yet, the sender would know packet 4 is out of order. The TCP stack of kernel will increase TcpExtTCPSACKReorder for both of the above scenarios. +* TcpExtTCPSlowStartRetrans + +The TCP stack wants to retransmit a packet and the congestion control +state is 'Loss'. + +* TcpExtTCPFastRetrans + +The TCP stack wants to retransmit a packet and the congestion control +state is not 'Loss'. + +* TcpExtTCPLostRetransmit + +A SACK points out that a retransmission packet is lost again. + +* TcpExtTCPRetransFail + +The TCP stack tries to deliver a retransmission packet to lower layers +but the lower layers return an error. + +* TcpExtTCPSynRetrans + +The TCP stack retransmits a SYN packet. + DSACK ===== The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report @@ -790,8 +840,9 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_). .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11 TCP receive window -================= +================== * TcpExtTCPWantZeroWindowAdv + Depending on current memory usage, the TCP stack tries to set receive window to zero. But the receive window might still be a no-zero value. For example, if the previous window size is 10, and the TCP @@ -799,14 +850,16 @@ stack receives 3 bytes, the current window size would be 7 even if the window size calculated by the memory usage is zero. * TcpExtTCPToZeroWindowAdv + The TCP receive window is set to zero from a no-zero value. * TcpExtTCPFromZeroWindowAdv + The TCP receive window is set to no-zero value from zero. Delayed ACK -========== +=========== The TCP Delayed ACK is a technique which is used for reducing the packet count in the network. For more details, please refer the `Delayed ACK wiki`_ @@ -814,10 +867,12 @@ packet count in the network. For more details, please refer the .. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment * TcpExtDelayedACKs + A delayed ACK timer expires. The TCP stack will send a pure ACK packet and exit the delayed ACK mode. * TcpExtDelayedACKLocked + A delayed ACK timer expires, but the TCP stack can't send an ACK immediately due to the socket is locked by a userspace program. The TCP stack will send a pure ACK later (after the userspace program @@ -826,24 +881,147 @@ TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK mode. * TcpExtDelayedACKLost + It will be updated when the TCP stack receives a packet which has been ACKed. A Delayed ACK loss might cause this issue, but it would also be triggered by other reasons, such as a packet is duplicated in the network. Tail Loss Probe (TLP) -=================== +===================== TLP is an algorithm which is used to detect TCP packet loss. For more details, please refer the `TLP paper`_. .. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 * TcpExtTCPLossProbes + A TLP probe packet is sent. * TcpExtTCPLossProbeRecovery + A packet loss is detected and recovered by TLP. +TCP Fast Open +============= +TCP Fast Open is a technology which allows data transfer before the +3-way handshake complete. Please refer the `TCP Fast Open wiki`_ for a +general description. + +.. _TCP Fast Open wiki: https://en.wikipedia.org/wiki/TCP_Fast_Open + +* TcpExtTCPFastOpenActive + +When the TCP stack receives an ACK packet in the SYN-SENT status, and +the ACK packet acknowledges the data in the SYN packet, the TCP stack +understand the TFO cookie is accepted by the other side, then it +updates this counter. + +* TcpExtTCPFastOpenActiveFail + +This counter indicates that the TCP stack initiated a TCP Fast Open, +but it failed. This counter would be updated in three scenarios: (1) +the other side doesn't acknowledge the data in the SYN packet. (2) The +SYN packet which has the TFO cookie is timeout at least once. (3) +after the 3-way handshake, the retransmission timeout happens +net.ipv4.tcp_retries1 times, because some middle-boxes may black-hole +fast open after the handshake. + +* TcpExtTCPFastOpenPassive + +This counter indicates how many times the TCP stack accepts the fast +open request. + +* TcpExtTCPFastOpenPassiveFail + +This counter indicates how many times the TCP stack rejects the fast +open request. It is caused by either the TFO cookie is invalid or the +TCP stack finds an error during the socket creating process. + +* TcpExtTCPFastOpenListenOverflow + +When the pending fast open request number is larger than +fastopenq->max_qlen, the TCP stack will reject the fast open request +and update this counter. When this counter is updated, the TCP stack +won't update TcpExtTCPFastOpenPassive or +TcpExtTCPFastOpenPassiveFail. The fastopenq->max_qlen is set by the +TCP_FASTOPEN socket operation and it could not be larger than +net.core.somaxconn. For example: + +setsockopt(sfd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)); + +* TcpExtTCPFastOpenCookieReqd + +This counter indicates how many times a client wants to request a TFO +cookie. + +SYN cookies +=========== +SYN cookies are used to mitigate SYN flood, for details, please refer +the `SYN cookies wiki`_. + +.. _SYN cookies wiki: https://en.wikipedia.org/wiki/SYN_cookies + +* TcpExtSyncookiesSent + +It indicates how many SYN cookies are sent. + +* TcpExtSyncookiesRecv + +How many reply packets of the SYN cookies the TCP stack receives. + +* TcpExtSyncookiesFailed + +The MSS decoded from the SYN cookie is invalid. When this counter is +updated, the received packet won't be treated as a SYN cookie and the +TcpExtSyncookiesRecv counter wont be updated. + +Challenge ACK +============= +For details of challenge ACK, please refer the explaination of +TcpExtTCPACKSkippedChallenge. + +* TcpExtTCPChallengeACK + +The number of challenge acks sent. + +* TcpExtTCPSYNChallenge + +The number of challenge acks sent in response to SYN packets. After +updates this counter, the TCP stack might send a challenge ACK and +update the TcpExtTCPChallengeACK counter, or it might also skip to +send the challenge and update the TcpExtTCPACKSkippedChallenge. + +prune +===== +When a socket is under memory pressure, the TCP stack will try to +reclaim memory from the receiving queue and out of order queue. One of +the reclaiming method is 'collapse', which means allocate a big sbk, +copy the contiguous skbs to the single big skb, and free these +contiguous skbs. + +* TcpExtPruneCalled + +The TCP stack tries to reclaim memory for a socket. After updates this +counter, the TCP stack will try to collapse the out of order queue and +the receiving queue. If the memory is still not enough, the TCP stack +will try to discard packets from the out of order queue (and update the +TcpExtOfoPruned counter) + +* TcpExtOfoPruned + +The TCP stack tries to discard packet on the out of order queue. + +* TcpExtRcvPruned + +After 'collapse' and discard packets from the out of order queue, if +the actually used memory is still larger than the max allowed memory, +this counter will be updated. It means the 'prune' fails. + +* TcpExtTCPRcvCollapsed + +This counter indicates how many skbs are freed during 'collapse'. + examples ======== From ec26016b953778c1727040adb98a0132b9162334 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 8 Feb 2019 22:25:44 +0100 Subject: [PATCH 0912/1436] net: dsa: mv88e6xxx: SERDES support 2500BaseT via external PHY By using an external PHY, ports 9 and 10 can support 2500BaseT. So set this link mode in the mask when validating. Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 669a7f13fdce..39c780363764 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -647,8 +647,10 @@ static void mv88e6390_phylink_validate(struct mv88e6xxx_chip *chip, int port, unsigned long *mask, struct phylink_link_state *state) { - if (port >= 9) + if (port >= 9) { phylink_set(mask, 2500baseX_Full); + phylink_set(mask, 2500baseT_Full); + } /* No ethtool bits for 200Mbps */ phylink_set(mask, 1000baseT_Full); From 99c864667c9fe3d1113a2b906aa942d01c87e996 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Fri, 8 Feb 2019 22:12:23 +0100 Subject: [PATCH 0913/1436] net: phy: aquantia: add support for AQCS109 Add support for the AQCS109. From software point of view, it should be almost equivalent to AQR107. v2: - make Nikita the author - document what I changed Signed-off-by: Nikita Yushchenko Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: use PHY_ID_MATCH_MODEL mascro] Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/aquantia.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index 482004efa62d..0f772a47a18e 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -17,6 +17,7 @@ #define PHY_ID_AQR105 0x03a1b4a2 #define PHY_ID_AQR106 0x03a1b4d0 #define PHY_ID_AQR107 0x03a1b4e0 +#define PHY_ID_AQCS109 0x03a1b5c2 #define PHY_ID_AQR405 0x03a1b4b0 #define MDIO_AN_TX_VEND_STATUS1 0xc800 @@ -202,6 +203,16 @@ static struct phy_driver aqr_driver[] = { .ack_interrupt = aqr_ack_interrupt, .read_status = aqr_read_status, }, +{ + PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), + .name = "Aquantia AQCS109", + .features = PHY_10GBIT_FULL_FEATURES, + .aneg_done = genphy_c45_aneg_done, + .config_aneg = aqr_config_aneg, + .config_intr = aqr_config_intr, + .ack_interrupt = aqr_ack_interrupt, + .read_status = aqr_read_status, +}, { PHY_ID_MATCH_MODEL(PHY_ID_AQR405), .name = "Aquantia AQR405", @@ -222,6 +233,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_AQR105) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR106) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR107) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR405) }, { } }; From d623876646be119439999a229a2c3ce30fd197fb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 8 Feb 2019 22:25:54 -0800 Subject: [PATCH 0914/1436] bpf: Fix narrow load on a bpf_sock returned from sk_lookup() By adding this test to test_verifier: { "reference tracking: access sk->src_ip4 (narrow load)", .insns = { BPF_SK_LOOKUP, BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, src_ip4) + 2), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_EMIT_CALL(BPF_FUNC_sk_release), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, The above test loads 2 bytes from sk->src_ip4 where sk is obtained by bpf_sk_lookup_tcp(). It hits an internal verifier error from convert_ctx_accesses(): [root@arch-fb-vm1 bpf]# ./test_verifier 665 665 Failed to load prog 'Invalid argument'! 0: (b7) r2 = 0 1: (63) *(u32 *)(r10 -8) = r2 2: (7b) *(u64 *)(r10 -16) = r2 3: (7b) *(u64 *)(r10 -24) = r2 4: (7b) *(u64 *)(r10 -32) = r2 5: (7b) *(u64 *)(r10 -40) = r2 6: (7b) *(u64 *)(r10 -48) = r2 7: (bf) r2 = r10 8: (07) r2 += -48 9: (b7) r3 = 36 10: (b7) r4 = 0 11: (b7) r5 = 0 12: (85) call bpf_sk_lookup_tcp#84 13: (bf) r6 = r0 14: (15) if r0 == 0x0 goto pc+3 R0=sock(id=1,off=0,imm=0) R6=sock(id=1,off=0,imm=0) R10=fp0,call_-1 fp-8=????0000 fp-16=0000mmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm refs=1 15: (69) r2 = *(u16 *)(r0 +26) 16: (bf) r1 = r6 17: (85) call bpf_sk_release#86 18: (95) exit from 14 to 18: safe processed 20 insns (limit 131072), stack depth 48 bpf verifier is misconfigured Summary: 0 PASSED, 0 SKIPPED, 1 FAILED The bpf_sock_is_valid_access() is expecting src_ip4 can be narrowly loaded (meaning load any 1 or 2 bytes of the src_ip4) by marking info->ctx_field_size. However, this marked ctx_field_size is not used. This patch fixes it. Due to the recent refactoring in test_verifier, this new test will be added to the bpf-next branch (together with the bpf_tcp_sock patchset) to avoid merge conflict. Fixes: c64b7983288e ("bpf: Add PTR_TO_SOCKET verifier type") Cc: Joe Stringer Signed-off-by: Martin KaFai Lau Acked-by: Joe Stringer Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 56674a7c3778..8f295b790297 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1617,12 +1617,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, return 0; } -static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, - int size, enum bpf_access_type t) +static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, + u32 regno, int off, int size, + enum bpf_access_type t) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; - struct bpf_insn_access_aux info; + struct bpf_insn_access_aux info = {}; if (reg->smin_value < 0) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", @@ -1636,6 +1637,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, return -EACCES; } + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + return 0; } @@ -2032,7 +2035,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose(env, "cannot write into socket\n"); return -EACCES; } - err = check_sock_access(env, regno, off, size, t); + err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { From 20e55bc17dd01f13cec0eb17e76e9511b23963ef Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sun, 10 Feb 2019 08:40:56 +0100 Subject: [PATCH 0915/1436] x86/mm: Make set_pmd_at() paravirt aware set_pmd_at() calls native_set_pmd() unconditionally on x86. This was fine as long as only huge page entries were written via set_pmd_at(), as Xen pv guests don't support those. Commit 2c91bd4a4e2e53 ("mm: speed up mremap by 20x on large regions") introduced a usage of set_pmd_at() possible on pv guests, leading to failures like: BUG: unable to handle kernel paging request at ffff888023e26778 #PF error: [PROT] [WRITE] RIP: e030:move_page_tables+0x7c1/0xae0 move_vma.isra.3+0xd1/0x2d0 __se_sys_mremap+0x3c6/0x5b0 do_syscall_64+0x49/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Make set_pmd_at() paravirt aware by just letting it use set_pmd(). Fixes: 2c91bd4a4e2e53 ("mm: speed up mremap by 20x on large regions") Reported-by: Sander Eikelenboom Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Cc: sstabellini@kernel.org Cc: hpa@zytor.com Cc: bp@alien8.de Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20190210074056.11842-1-jgross@suse.com --- arch/x86/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 40616e805292..2779ace16d23 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1065,7 +1065,7 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - native_set_pmd(pmdp, pmd); + set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, From 7fd56e0260a22c0cfaf9adb94a2427b76e239dd0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 6 Feb 2019 12:01:16 -0200 Subject: [PATCH 0916/1436] drm/vkms: Fix license inconsistent Fixes license inconsistent related to the VKMS driver and remove the redundant boilerplate comment. Fixes: 854502fa0a38 ("drm/vkms: Add basic CRTC initialization") Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Siqueira Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190206140116.7qvy2lpwbcd7wds6@smtp.gmail.com --- drivers/gpu/drm/vkms/vkms_crc.c | 3 ++- drivers/gpu/drm/vkms/vkms_crtc.c | 8 +------- drivers/gpu/drm/vkms/vkms_drv.c | 7 +------ drivers/gpu/drm/vkms/vkms_drv.h | 2 ++ drivers/gpu/drm/vkms/vkms_gem.c | 8 +------- drivers/gpu/drm/vkms/vkms_output.c | 8 +------- drivers/gpu/drm/vkms/vkms_plane.c | 8 +------- 7 files changed, 9 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_crc.c b/drivers/gpu/drm/vkms/vkms_crc.c index 9d9e8146db90..d7b409a3c0f8 100644 --- a/drivers/gpu/drm/vkms/vkms_crc.c +++ b/drivers/gpu/drm/vkms/vkms_crc.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ + #include "vkms_drv.h" #include #include diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 177bbcb38306..eb56ee893761 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 83087877565c..7dcbecb5fac2 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -1,9 +1,4 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ /** * DOC: vkms (Virtual Kernel Modesetting) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index e4469cd3d254..81f1cfbeb936 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + #ifndef _VKMS_DRV_H_ #define _VKMS_DRV_H_ diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c index 80311daed47a..138b0bb325cf 100644 --- a/drivers/gpu/drm/vkms/vkms_gem.c +++ b/drivers/gpu/drm/vkms/vkms_gem.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 271a0eb9042c..4173e4f48334 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index 418817600ad1..0e67d2d42f0c 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -1,10 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ +// SPDX-License-Identifier: GPL-2.0+ #include "vkms_drv.h" #include From 257eeded20b34219d5484cfc415b3e39093f37b8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 10 Feb 2019 14:24:59 +0200 Subject: [PATCH 0917/1436] net: Move all TC actions identifiers to one place Move all the TC identifiers to one place, to the same enum that defines the identifier of police action. This makes it easier choose numbers for new actions since they are now defined in one place. We preserve the original values for binary compatibility. New IDs should be added inside the enum. Signed-off-by: Eli Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 43 +++++++++++++++++++++-- include/uapi/linux/tc_act/tc_bpf.h | 2 -- include/uapi/linux/tc_act/tc_connmark.h | 2 -- include/uapi/linux/tc_act/tc_csum.h | 2 -- include/uapi/linux/tc_act/tc_gact.h | 1 - include/uapi/linux/tc_act/tc_ife.h | 1 - include/uapi/linux/tc_act/tc_ipt.h | 3 -- include/uapi/linux/tc_act/tc_mirred.h | 1 - include/uapi/linux/tc_act/tc_nat.h | 2 -- include/uapi/linux/tc_act/tc_pedit.h | 2 -- include/uapi/linux/tc_act/tc_sample.h | 2 -- include/uapi/linux/tc_act/tc_skbedit.h | 2 -- include/uapi/linux/tc_act/tc_skbmod.h | 2 -- include/uapi/linux/tc_act/tc_tunnel_key.h | 2 -- include/uapi/linux/tc_act/tc_vlan.h | 2 -- net/sched/act_simple.c | 2 -- tools/include/uapi/linux/tc_act/tc_bpf.h | 2 -- 17 files changed, 40 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 02ac251be8c4..7ab55f97e7c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -63,12 +63,49 @@ enum { #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) #define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN +/* These macros are put here for binary compatibility with userspace apps that + * make use of them. For kernel code and new userspace apps, use the TCA_ID_* + * versions. + */ +#define TCA_ACT_GACT 5 +#define TCA_ACT_IPT 6 +#define TCA_ACT_PEDIT 7 +#define TCA_ACT_MIRRED 8 +#define TCA_ACT_NAT 9 +#define TCA_ACT_XT 10 +#define TCA_ACT_SKBEDIT 11 +#define TCA_ACT_VLAN 12 +#define TCA_ACT_BPF 13 +#define TCA_ACT_CONNMARK 14 +#define TCA_ACT_SKBMOD 15 +#define TCA_ACT_CSUM 16 +#define TCA_ACT_TUNNEL_KEY 17 +#define TCA_ACT_SIMP 22 +#define TCA_ACT_IFE 25 +#define TCA_ACT_SAMPLE 26 + /* Action type identifiers*/ enum { - TCA_ID_UNSPEC=0, - TCA_ID_POLICE=1, + TCA_ID_UNSPEC = 0, + TCA_ID_POLICE = 1, + TCA_ID_GACT = TCA_ACT_GACT, + TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_PEDIT = TCA_ACT_PEDIT, + TCA_ID_MIRRED = TCA_ACT_MIRRED, + TCA_ID_NAT = TCA_ACT_NAT, + TCA_ID_XT = TCA_ACT_XT, + TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT, + TCA_ID_VLAN = TCA_ACT_VLAN, + TCA_ID_BPF = TCA_ACT_BPF, + TCA_ID_CONNMARK = TCA_ACT_CONNMARK, + TCA_ID_SKBMOD = TCA_ACT_SKBMOD, + TCA_ID_CSUM = TCA_ACT_CSUM, + TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY, + TCA_ID_SIMP = TCA_ACT_SIMP, + TCA_ID_IFE = TCA_ACT_IFE, + TCA_ID_SAMPLE = TCA_ACT_SAMPLE, /* other actions go here */ - __TCA_ID_MAX=255 + __TCA_ID_MAX = 255 }; #define TCA_ID_MAX __TCA_ID_MAX diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h index 80caa47b1933..9f8f6f709feb 100644 --- a/include/uapi/linux/tc_act/tc_connmark.h +++ b/include/uapi/linux/tc_act/tc_connmark.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_CONNMARK 14 - struct tc_connmark { tc_gen; __u16 zone; diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h index 0ecf4d29e2f3..94b2044929de 100644 --- a/include/uapi/linux/tc_act/tc_csum.h +++ b/include/uapi/linux/tc_act/tc_csum.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_CSUM 16 - enum { TCA_CSUM_UNSPEC, TCA_CSUM_PARMS, diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h index 94273c3b81b0..37e5392e02c7 100644 --- a/include/uapi/linux/tc_act/tc_gact.h +++ b/include/uapi/linux/tc_act/tc_gact.h @@ -5,7 +5,6 @@ #include #include -#define TCA_ACT_GACT 5 struct tc_gact { tc_gen; diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 2f48490ef386..8c401f185675 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -6,7 +6,6 @@ #include #include -#define TCA_ACT_IFE 25 /* Flag bits for now just encoding/decoding; mutually exclusive */ #define IFE_ENCODE 1 #define IFE_DECODE 0 diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h index b743c8bddd13..c48d7da6750d 100644 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ b/include/uapi/linux/tc_act/tc_ipt.h @@ -4,9 +4,6 @@ #include -#define TCA_ACT_IPT 6 -#define TCA_ACT_XT 10 - enum { TCA_IPT_UNSPEC, TCA_IPT_TABLE, diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 5dd671cf5776..2500a0005d05 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -5,7 +5,6 @@ #include #include -#define TCA_ACT_MIRRED 8 #define TCA_EGRESS_REDIR 1 /* packet redirect to EGRESS*/ #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */ #define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/ diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h index 086be842587b..21399c2c6130 100644 --- a/include/uapi/linux/tc_act/tc_nat.h +++ b/include/uapi/linux/tc_act/tc_nat.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_NAT 9 - enum { TCA_NAT_UNSPEC, TCA_NAT_PARMS, diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index 24ec792dacc1..f3e61b04fa01 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -5,8 +5,6 @@ #include #include -#define TCA_ACT_PEDIT 7 - enum { TCA_PEDIT_UNSPEC, TCA_PEDIT_TM, diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h index bd7e9f03abd2..fee1bcc20793 100644 --- a/include/uapi/linux/tc_act/tc_sample.h +++ b/include/uapi/linux/tc_act/tc_sample.h @@ -6,8 +6,6 @@ #include #include -#define TCA_ACT_SAMPLE 26 - struct tc_sample { tc_gen; }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 6de6071ebed6..800e93377218 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -23,8 +23,6 @@ #include -#define TCA_ACT_SKBEDIT 11 - #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index 38c072f66f2f..c525b3503797 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_SKBMOD 15 - #define SKBMOD_F_DMAC 0x1 #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index be384d63e1b5..41c8b462c177 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -14,8 +14,6 @@ #include -#define TCA_ACT_TUNNEL_KEY 17 - #define TCA_TUNNEL_KEY_ACT_SET 1 #define TCA_TUNNEL_KEY_ACT_RELEASE 2 diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 0d7b5fd6605b..168995b54a70 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_VLAN 12 - #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 #define TCA_VLAN_ACT_MODIFY 3 diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 902957beceb3..b2b16d440154 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -19,8 +19,6 @@ #include #include -#define TCA_ACT_SIMP 22 - #include #include diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h index 6e89a5df49a4..653c4f94f76e 100644 --- a/tools/include/uapi/linux/tc_act/tc_bpf.h +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -13,8 +13,6 @@ #include -#define TCA_ACT_BPF 13 - struct tc_act_bpf { tc_gen; }; From eddd2cf195d6fb5e4bbc91a0fe4be55110f559ab Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 10 Feb 2019 14:25:00 +0200 Subject: [PATCH 0918/1436] net: Change TCA_ACT_* to TCA_ID_* to match that of TCA_ID_POLICE Modify the kernel users of the TCA_ACT_* macros to use TCA_ID_*. For example, use TCA_ID_GACT instead of TCA_ACT_GACT. This will align with TCA_ID_POLICE and also differentiates these identifier, used in struct tc_action_ops type field, from other macros starting with TCA_ACT_. To make things clearer, we name the enum defining the TCA_ID_* identifiers and also change the "type" field of struct tc_action to id. Signed-off-by: Eli Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/tc_act/tc_csum.h | 2 +- include/net/tc_act/tc_gact.h | 2 +- include/net/tc_act/tc_mirred.h | 4 ++-- include/net/tc_act/tc_pedit.h | 2 +- include/net/tc_act/tc_sample.h | 2 +- include/net/tc_act/tc_skbedit.h | 2 +- include/net/tc_act/tc_tunnel_key.h | 4 ++-- include/net/tc_act/tc_vlan.h | 2 +- include/uapi/linux/pkt_cls.h | 2 +- net/sched/act_api.c | 2 +- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 27 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index dbc795ec659e..c745e9ccfab2 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -80,7 +80,7 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; - __u32 type; /* TBD to match kind */ + enum tca_id id; /* identifier should match kind */ size_t size; struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 32d2454c0479..68269e4581b7 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -21,7 +21,7 @@ struct tcf_csum { static inline bool is_tcf_csum(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_CSUM) + if (a->ops && a->ops->id == TCA_ID_CSUM) return true; #endif return false; diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ef8dd0db70ce..ee8d005f56fc 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -22,7 +22,7 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; - if (a->ops && a->ops->type != TCA_ACT_GACT) + if (a->ops && a->ops->id != TCA_ID_GACT) return false; gact = to_gact(a); diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index a2e9cbca5c9e..c757585a05b0 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -17,7 +17,7 @@ struct tcf_mirred { static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR; #endif return false; @@ -26,7 +26,7 @@ static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_MIRRED) + if (a->ops && a->ops->id == TCA_ID_MIRRED) return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR; #endif return false; diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index fac3ad4a86de..748cf87a4d7e 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -23,7 +23,7 @@ struct tcf_pedit { static inline bool is_tcf_pedit(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_PEDIT) + if (a->ops && a->ops->id == TCA_ID_PEDIT) return true; #endif return false; diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index 01dbfea32672..0a559d4b6f0f 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -20,7 +20,7 @@ struct tcf_sample { static inline bool is_tcf_sample(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - return a->ops && a->ops->type == TCA_ACT_SAMPLE; + return a->ops && a->ops->id == TCA_ID_SAMPLE; #else return false; #endif diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 911bbac838a2..85c5c4756d92 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -44,7 +44,7 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) #ifdef CONFIG_NET_CLS_ACT u32 flags; - if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) { + if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { rcu_read_lock(); flags = rcu_dereference(to_skbedit(a)->params)->flags; rcu_read_unlock(); diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 46b8c7f1c8d5..23d5b8b19f3e 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -34,7 +34,7 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif return false; @@ -46,7 +46,7 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); - if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif return false; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 22ae260d6869..fe39ed502bef 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -30,7 +30,7 @@ struct tcf_vlan { static inline bool is_tcf_vlan(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT - if (a->ops && a->ops->type == TCA_ACT_VLAN) + if (a->ops && a->ops->id == TCA_ID_VLAN) return true; #endif return false; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7ab55f97e7c4..51a0496f78ea 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -85,7 +85,7 @@ enum { #define TCA_ACT_SAMPLE 26 /* Action type identifiers*/ -enum { +enum tca_id { TCA_ID_UNSPEC = 0, TCA_ID_POLICE = 1, TCA_ID_GACT = TCA_ACT_GACT, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d4b8355737d8..aecf1bf233c8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act, write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { - if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { + if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); unregister_pernet_subsys(ops); return -EEXIST; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c7633843e223..aa5c38d11a30 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", - .type = TCA_ACT_BPF, + .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8475913f2070..5d24993cccfe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_connmark_ops = { .kind = "connmark", - .type = TCA_ACT_CONNMARK, + .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3dc25b7806d7..945fb34ae721 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_csum_ops = { .kind = "csum", - .type = TCA_ACT_CSUM, + .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b61c20ebb314..93da0004e9f4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_gact_ops = { .kind = "gact", - .type = TCA_ACT_GACT, + .id = TCA_ID_GACT, .owner = THIS_MODULE, .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 30b63fa23ee2..9b1f2b3990ee 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ife_ops = { .kind = "ife", - .type = TCA_ACT_IFE, + .id = TCA_ID_IFE, .owner = THIS_MODULE, .act = tcf_ife_act, .dump = tcf_ife_dump, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8af6c11d2482..1bad190710ad 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .type = TCA_ACT_IPT, + .id = TCA_ID_IPT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, @@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_xt_ops = { .kind = "xt", - .type = TCA_ACT_XT, + .id = TCA_ID_XT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c8cf4d10c435..6692fd054617 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev) static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .type = TCA_ACT_MIRRED, + .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c5c1e23add77..543eab9193f1 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_nat_ops = { .kind = "nat", - .type = TCA_ACT_NAT, + .id = TCA_ID_NAT, .owner = THIS_MODULE, .act = tcf_nat_act, .dump = tcf_nat_dump, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 3663d3b615a4..a80373878df7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .type = TCA_ACT_PEDIT, + .id = TCA_ID_PEDIT, .owner = THIS_MODULE, .act = tcf_pedit_act, .dump = tcf_pedit_dump, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ec8ec55e0fe8..8271a6263824 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -366,7 +366,7 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .type = TCA_ID_POLICE, + .id = TCA_ID_POLICE, .owner = THIS_MODULE, .act = tcf_police_act, .dump = tcf_police_dump, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1a0c682fd734..203e399e5c85 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_sample_ops = { .kind = "sample", - .type = TCA_ACT_SAMPLE, + .id = TCA_ID_SAMPLE, .owner = THIS_MODULE, .act = tcf_sample_act, .dump = tcf_sample_dump, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index b2b16d440154..d54cb608dbaf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -195,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_simp_ops = { .kind = "simple", - .type = TCA_ACT_SIMP, + .id = TCA_ID_SIMP, .owner = THIS_MODULE, .act = tcf_simp_act, .dump = tcf_simp_dump, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 64dba3708fce..39f8a67ea940 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", - .type = TCA_ACT_SKBEDIT, + .id = TCA_ID_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit_act, .dump = tcf_skbedit_dump, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 59710a183bd3..7bac1d78e7a3 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", - .type = TCA_ACT_SKBMOD, + .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8b43fe0130f7..9104b8e36482 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", - .type = TCA_ACT_TUNNEL_KEY, + .id = TCA_ID_TUNNEL_KEY, .owner = THIS_MODULE, .act = tunnel_key_act, .dump = tunnel_key_dump, diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 93fdaf707313..ac0061599225 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_vlan_ops = { .kind = "vlan", - .type = TCA_ACT_VLAN, + .id = TCA_ID_VLAN, .owner = THIS_MODULE, .act = tcf_vlan_act, .dump = tcf_vlan_dump, From b8554d4f7288f86fb278e0bc7b5b19579bf16b69 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 19:57:56 +0100 Subject: [PATCH 0919/1436] net: phy: add register modifying helpers returning 1 on change When modifying registers there are scenarios where we need to know whether the register content actually changed. This patch adds new helpers to not break users of the current ones, phy_modify() etc. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 129 ++++++++++++++++++++++++++++++++++--- include/linux/phy.h | 12 +++- 2 files changed, 129 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 7d6aad287f84..cdea028d1328 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -531,7 +531,7 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) EXPORT_SYMBOL(phy_write_mmd); /** - * __phy_modify() - Convenience function for modifying a PHY register + * __phy_modify_changed() - Convenience function for modifying a PHY register * @phydev: a pointer to a &struct phy_device * @regnum: register number * @mask: bit mask of bits to clear @@ -539,16 +539,69 @@ EXPORT_SYMBOL(phy_write_mmd); * * Unlocked helper function which allows a PHY register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change */ -int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set) { - int ret; + int new, ret; ret = __phy_read(phydev, regnum); if (ret < 0) return ret; - ret = __phy_write(phydev, regnum, (ret & ~mask) | set); + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write(phydev, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_changed); + +/** + * phy_modify_changed - Function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_changed(phydev, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_changed); + +/** + * __phy_modify - Convenience function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + ret = __phy_modify_changed(phydev, regnum, mask, set); return ret < 0 ? ret : 0; } @@ -578,7 +631,7 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) EXPORT_SYMBOL_GPL(phy_modify); /** - * __phy_modify_mmd - Convenience function for modifying a register on MMD + * __phy_modify_mmd_changed - Function for modifying a register on MMD * @phydev: the phy_device struct * @devad: the MMD containing register to modify * @regnum: register number to modify @@ -587,17 +640,73 @@ EXPORT_SYMBOL_GPL(phy_modify); * * Unlocked helper function which allows a MMD register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int new, ret; + + ret = __phy_read_mmd(phydev, devad, regnum); + if (ret < 0) + return ret; + + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write_mmd(phydev, devad, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd_changed); + +/** + * phy_modify_mmd_changed - Function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd_changed); + +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. */ int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 mask, u16 set) { int ret; - ret = __phy_read_mmd(phydev, devad, regnum); - if (ret < 0) - return ret; - - ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); return ret < 0 ? ret : 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index d2ffae992e4a..378da9a6165e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -799,13 +799,21 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); /** * __phy_set_bits - Convenience function for setting bits in a PHY register From b06d8e5a5dcc21063a0a84658b7106f40aa400e4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 19:58:49 +0100 Subject: [PATCH 0920/1436] net: phy: marvell10g: fix usage of new MMD modifying helpers When replacing mv3310_modify() with phy_modify_mmd() we missed that they behave differently, mv3310_modify() returns 1 on a changed register value whilst phy_modify_mmd() returns 0. Fix this by replacing phy_modify_mmd() with phy_modify_mmd_changed() where needed. Fixes: b52c018ddccf ("net: phy: make use of new MMD accessors") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 96a79c6c7810..08362dc657cd 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -141,10 +141,9 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) return ret; val = enable ? MV_V2_TEMP_CTRL_SAMPLE : MV_V2_TEMP_CTRL_DISABLE; - ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, - MV_V2_TEMP_CTRL_MASK, val); - return ret < 0 ? ret : 0; + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP_CTRL, + MV_V2_TEMP_CTRL_MASK, val); } static void mv3310_hwmon_disable(void *data) @@ -345,7 +344,7 @@ static int mv3310_config_aneg(struct phy_device *phydev) linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, linkmode_adv_to_mii_adv_t(phydev->advertising)); @@ -355,7 +354,7 @@ static int mv3310_config_aneg(struct phy_device *phydev) changed = true; reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); if (ret < 0) return ret; @@ -369,8 +368,8 @@ static int mv3310_config_aneg(struct phy_device *phydev) else reg = 0; - ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, reg); + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV10G, reg); if (ret < 0) return ret; if (ret > 0) From 4f9744ed3c285fd2d2cf7f9a43c00c03604a2515 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 19:59:57 +0100 Subject: [PATCH 0921/1436] net: phy: use phy_modify_changed in genphy_config_advert Use phy_modify_changed() to simplify the code. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 43 +++++++++++++----------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8573d17ece0f..3d14e48aebc5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(phy_reset_after_clk_enable); static int genphy_config_advert(struct phy_device *phydev) { u32 advertise; - int oldadv, adv, bmsr; + int bmsr, adv; int err, changed = 0; /* Only allow advertising what this PHY supports */ @@ -1529,22 +1529,14 @@ static int genphy_config_advert(struct phy_device *phydev) phydev->advertising); /* Setup standard advertisement */ - adv = phy_read(phydev, MII_ADVERTISE); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | - ADVERTISE_PAUSE_ASYM); - adv |= ethtool_adv_to_mii_adv_t(advertise); - - if (adv != oldadv) { - err = phy_write(phydev, MII_ADVERTISE, adv); - - if (err < 0) - return err; + err = phy_modify_changed(phydev, MII_ADVERTISE, + ADVERTISE_ALL | ADVERTISE_100BASE4 | + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, + ethtool_adv_to_mii_adv_t(advertise)); + if (err < 0) + return err; + if (err > 0) changed = 1; - } bmsr = phy_read(phydev, MII_BMSR); if (bmsr < 0) @@ -1558,25 +1550,20 @@ static int genphy_config_advert(struct phy_device *phydev) return changed; /* Configure gigabit if it's supported */ - adv = phy_read(phydev, MII_CTRL1000); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); - + adv = 0; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, phydev->supported) || linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, phydev->supported)) - adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); + adv = ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; - - err = phy_write(phydev, MII_CTRL1000, adv); + err = phy_modify_changed(phydev, MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); if (err < 0) return err; + if (err > 0) + changed = 1; return changed; } From 978749617bc0b5355e659cbeb855f7bcdc3b40ea Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 15:26:07 +0100 Subject: [PATCH 0922/1436] Revert "r8169: remove unneeded mmiowb barriers" This reverts commit bd7153bd83b806bfcc2e79b7a6f43aa653d06ef3. There doesn't seem to be anything wrong with this patch, it's just reverted to get a stable baseline again. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index abb94c543aa2..bba806ce57d3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1286,11 +1286,13 @@ static u16 rtl_get_events(struct rtl8169_private *tp) static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) { RTL_W16(tp, IntrStatus, bits); + mmiowb(); } static void rtl_irq_disable(struct rtl8169_private *tp) { RTL_W16(tp, IntrMask, 0); + mmiowb(); } #define RTL_EVENT_NAPI_RX (RxOK | RxErr) @@ -6130,8 +6132,10 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(stop_queue)) netif_stop_queue(dev); - if (__netdev_sent_queue(dev, skb->len, skb->xmit_more)) + if (__netdev_sent_queue(dev, skb->len, skb->xmit_more)) { RTL_W8(tp, TxPoll, NPQ); + mmiowb(); + } if (unlikely(stop_queue)) { /* Sync with rtl_tx: @@ -6483,7 +6487,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); + rtl_irq_enable(tp); + mmiowb(); } return work_done; From 0255d5927c0f2484de2649c7eb69e4fc459f0130 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 15:28:04 +0100 Subject: [PATCH 0923/1436] Revert "r8169: make use of xmit_more and __netdev_sent_queue" This reverts commit 2e6eedb4813e34d8d84ac0eb3afb668966f3f356. Sander reported a regression causing a kernel panic[1], therefore let's revert this commit. [1] https://marc.info/?t=154965066400001&r=1&w=2 Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index bba806ce57d3..6e36b88ca7c9 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6074,7 +6074,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct device *d = tp_to_dev(tp); dma_addr_t mapping; u32 opts[2], len; - bool stop_queue; int frags; if (unlikely(!rtl_tx_slots_avail(tp, skb_shinfo(skb)->nr_frags))) { @@ -6116,6 +6115,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); + netdev_sent_queue(dev, skb->len); + skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ @@ -6128,16 +6129,16 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->cur_tx += frags + 1; - stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS); - if (unlikely(stop_queue)) + RTL_W8(tp, TxPoll, NPQ); + + mmiowb(); + + if (!rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) { + /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must + * not miss a ring update when it notices a stopped queue. + */ + smp_wmb(); netif_stop_queue(dev); - - if (__netdev_sent_queue(dev, skb->len, skb->xmit_more)) { - RTL_W8(tp, TxPoll, NPQ); - mmiowb(); - } - - if (unlikely(stop_queue)) { /* Sync with rtl_tx: * - publish queue status and cur_tx ring index (write barrier) * - refresh dirty_tx ring index (read barrier). From d13937116f1e82bf508a6325111b322c30c85eb9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Feb 2019 14:42:20 -0800 Subject: [PATCH 0924/1436] Linux 5.0-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3142e67d03f1..86cf35d1d79d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Shy Crocodile # *DOCUMENTATION* From 842fc0f5dc5c9f9bd91f891554996d903c40cf35 Mon Sep 17 00:00:00 2001 From: Bob Tracy Date: Mon, 21 Jan 2019 21:09:14 -0800 Subject: [PATCH 0925/1436] tools uapi: fix Alpha support Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Bob Tracy Signed-off-by: Matt Turner --- tools/include/uapi/asm/bitsperlong.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index fd92ce8388fc..57aaeaf8e192 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -15,6 +15,8 @@ #include "../../arch/ia64/include/uapi/asm/bitsperlong.h" #elif defined(__riscv) #include "../../arch/riscv/include/uapi/asm/bitsperlong.h" +#elif defined(__alpha__) +#include "../../arch/alpha/include/uapi/asm/bitsperlong.h" #else #include #endif From 5f4566498dee5e38e36a015a968c22ed21568f0b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 8 Feb 2019 22:25:54 -0800 Subject: [PATCH 0926/1436] bpf: Fix narrow load on a bpf_sock returned from sk_lookup() By adding this test to test_verifier: { "reference tracking: access sk->src_ip4 (narrow load)", .insns = { BPF_SK_LOOKUP, BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, src_ip4) + 2), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_EMIT_CALL(BPF_FUNC_sk_release), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, The above test loads 2 bytes from sk->src_ip4 where sk is obtained by bpf_sk_lookup_tcp(). It hits an internal verifier error from convert_ctx_accesses(): [root@arch-fb-vm1 bpf]# ./test_verifier 665 665 Failed to load prog 'Invalid argument'! 0: (b7) r2 = 0 1: (63) *(u32 *)(r10 -8) = r2 2: (7b) *(u64 *)(r10 -16) = r2 3: (7b) *(u64 *)(r10 -24) = r2 4: (7b) *(u64 *)(r10 -32) = r2 5: (7b) *(u64 *)(r10 -40) = r2 6: (7b) *(u64 *)(r10 -48) = r2 7: (bf) r2 = r10 8: (07) r2 += -48 9: (b7) r3 = 36 10: (b7) r4 = 0 11: (b7) r5 = 0 12: (85) call bpf_sk_lookup_tcp#84 13: (bf) r6 = r0 14: (15) if r0 == 0x0 goto pc+3 R0=sock(id=1,off=0,imm=0) R6=sock(id=1,off=0,imm=0) R10=fp0,call_-1 fp-8=????0000 fp-16=0000mmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm refs=1 15: (69) r2 = *(u16 *)(r0 +26) 16: (bf) r1 = r6 17: (85) call bpf_sk_release#86 18: (95) exit from 14 to 18: safe processed 20 insns (limit 131072), stack depth 48 bpf verifier is misconfigured Summary: 0 PASSED, 0 SKIPPED, 1 FAILED The bpf_sock_is_valid_access() is expecting src_ip4 can be narrowly loaded (meaning load any 1 or 2 bytes of the src_ip4) by marking info->ctx_field_size. However, this marked ctx_field_size is not used. This patch fixes it. Due to the recent refactoring in test_verifier, this new test will be added to the bpf-next branch (together with the bpf_tcp_sock patchset) to avoid merge conflict. Fixes: c64b7983288e ("bpf: Add PTR_TO_SOCKET verifier type") Cc: Joe Stringer Signed-off-by: Martin KaFai Lau Acked-by: Joe Stringer Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b63bc77af2d1..516dfc6d78de 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1640,12 +1640,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, return 0; } -static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, - int size, enum bpf_access_type t) +static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, + u32 regno, int off, int size, + enum bpf_access_type t) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; - struct bpf_insn_access_aux info; + struct bpf_insn_access_aux info = {}; if (reg->smin_value < 0) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", @@ -1659,6 +1660,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, return -EACCES; } + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + return 0; } @@ -2055,7 +2058,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose(env, "cannot write into socket\n"); return -EACCES; } - err = check_sock_access(env, regno, off, size, t); + err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else { From 46f8bc92758c6259bcf945e9216098661c1587cd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:20 -0800 Subject: [PATCH 0927/1436] bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 12 ++++ include/uapi/linux/bpf.h | 12 +++- kernel/bpf/verifier.c | 132 +++++++++++++++++++++++++++------------ net/core/filter.c | 42 +++++++++++++ 4 files changed, 157 insertions(+), 41 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bd169a7bcc93..a60463b45b54 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,7 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ }; /* type of values returned from helper functions */ @@ -256,6 +257,8 @@ enum bpf_reg_type { PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ + PTR_TO_SOCK_COMMON, /* reg points to sock_common */ + PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -920,6 +923,9 @@ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) +bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info); bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, @@ -928,6 +934,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size); #else +static inline bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} static inline bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1777fa0c61e4..5d79cba74ddc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2329,6 +2329,14 @@ union bpf_attr { * "**y**". * Return * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in bpf_sock can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2425,7 +2433,8 @@ union bpf_attr { FN(msg_pop_data), \ FN(rc_pointer_rel), \ FN(spin_lock), \ - FN(spin_unlock), + FN(spin_unlock), \ + FN(sk_fullsock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2545,6 +2554,7 @@ struct __sk_buff { __u64 tstamp; __u32 wire_len; __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); }; struct bpf_tunnel_key { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 516dfc6d78de..b755d55a3791 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) type == PTR_TO_PACKET_META; } +static bool type_is_sk_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCK_COMMON; +} + static bool reg_type_may_be_null(enum bpf_reg_type type) { return type == PTR_TO_MAP_VALUE_OR_NULL || - type == PTR_TO_SOCKET_OR_NULL; + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_SOCK_COMMON_OR_NULL; } static bool type_is_refcounted(enum bpf_reg_type type) @@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_sk_release; } +static bool is_acquire_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_sk_lookup_tcp || + func_id == BPF_FUNC_sk_lookup_udp; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -392,6 +405,8 @@ static const char * const reg_type_str[] = { [PTR_TO_FLOW_KEYS] = "flow_keys", [PTR_TO_SOCKET] = "sock", [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", }; static char slot_type_char[] = { @@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) } /* release function corresponding to acquire_reference_state(). Idempotent. */ -static int __release_reference_state(struct bpf_func_state *state, int ptr_id) +static int release_reference_state(struct bpf_func_state *state, int ptr_id) { int i, last_idx; - if (!ptr_id) - return -EFAULT; - last_idx = state->acquired_refs - 1; for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].id == ptr_id) { @@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id) return 0; } } - return -EFAULT; -} - -/* variation on the above for cases where we expect that there must be an - * outstanding reference for the specified ptr_id. - */ -static int release_reference_state(struct bpf_verifier_env *env, int ptr_id) -{ - struct bpf_func_state *state = cur_func(env); - int err; - - err = __release_reference_state(state, ptr_id); - if (WARN_ON_ONCE(err != 0)) - verbose(env, "verifier internal error: can't release reference\n"); - return err; + return -EINVAL; } static int transfer_reference_state(struct bpf_func_state *dst, @@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: return true; default: return false; @@ -1647,6 +1647,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; struct bpf_insn_access_aux info = {}; + bool valid; if (reg->smin_value < 0) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", @@ -1654,15 +1655,28 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } - if (!bpf_sock_is_valid_access(off, size, t, &info)) { - verbose(env, "invalid bpf_sock access off=%d size=%d\n", - off, size); - return -EACCES; + switch (reg->type) { + case PTR_TO_SOCK_COMMON: + valid = bpf_sock_common_is_valid_access(off, size, t, &info); + break; + case PTR_TO_SOCKET: + valid = bpf_sock_is_valid_access(off, size, t, &info); + break; + default: + valid = false; } - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; - return 0; + if (valid) { + env->insn_aux_data[insn_idx].ctx_field_size = + info.ctx_field_size; + return 0; + } + + verbose(env, "R%d invalid %s access off=%d size=%d\n", + regno, reg_type_str[reg->type], off, size); + + return -EACCES; } static bool __is_pointer_value(bool allow_ptr_leaks, @@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { const struct bpf_reg_state *reg = reg_state(env, regno); - return reg->type == PTR_TO_CTX || - reg->type == PTR_TO_SOCKET; + return reg->type == PTR_TO_CTX; +} + +static bool is_sk_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return type_is_sk_pointer(reg->type); } static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) @@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_SOCKET: pointer_desc = "sock "; break; + case PTR_TO_SOCK_COMMON: + pointer_desc = "sock_common "; + break; default: break; } @@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * PTR_TO_PACKET[_META,_END]. In the latter * case, we know the offset is zero. */ - if (reg_type == SCALAR_VALUE) + if (reg_type == SCALAR_VALUE) { mark_reg_unknown(env, regs, value_regno); - else + } else { mark_reg_known_zero(env, regs, value_regno); + if (reg_type_may_be_null(reg_type)) + regs[value_regno].id = ++env->id_gen; + } regs[value_regno].type = reg_type; } @@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_flow_keys_access(env, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_SOCKET) { + } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { - verbose(env, "cannot write into socket\n"); + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); @@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || - is_flow_key_reg(env, insn->dst_reg)) { + is_flow_key_reg(env, insn->dst_reg) || + is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str[reg_state(env, insn->dst_reg)->type]); @@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_ctx_reg(env, reg, regno); if (err < 0) return err; + } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { + expected_type = PTR_TO_SOCK_COMMON; + /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ + if (!type_is_sk_pointer(type)) + goto err_type; } else if (arg_type == ARG_PTR_TO_SOCKET) { expected_type = PTR_TO_SOCKET; if (type != expected_type) @@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env, for (i = 0; i <= vstate->curframe; i++) release_reg_references(env, vstate->frame[i], meta->ptr_id); - return release_reference_state(env, meta->ptr_id); + return release_reference_state(cur_func(env), meta->ptr_id); } static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } } else if (is_release_function(func_id)) { err = release_reference(env, &meta); - if (err) + if (err) { + verbose(env, "func %s#%d reference has not been acquired before\n", + func_id_name(func_id), func_id); return err; + } } regs = cur_regs(env); @@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].id = ++env->id_gen; } } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { - int id = acquire_reference_state(env, insn_idx); - if (id < 0) - return id; mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - regs[BPF_REG_0].id = id; + if (is_acquire_function(func_id)) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + /* For release_reference() */ + regs[BPF_REG_0].id = id; + } else { + /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = ++env->id_gen; + } } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_PACKET_END: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; + } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { + reg->type = PTR_TO_SOCK_COMMON; } if (is_null || !(reg_is_refcounted(reg) || reg_may_point_to_spin_lock(reg))) { @@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, int i, j; if (reg_is_refcounted_or_null(®s[regno]) && is_null) - __release_reference_state(state, id); + release_reference_state(state, id); for (i = 0; i < MAX_BPF_REG; i++) mark_ptr_or_null_reg(state, ®s[i], id, is_null); @@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_CTX: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: return false; default: return true; @@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) convert_ctx_access = ops->convert_ctx_access; break; case PTR_TO_SOCKET: + case PTR_TO_SOCK_COMMON: convert_ctx_access = bpf_sock_convert_ctx_access; break; default: diff --git a/net/core/filter.c b/net/core/filter.c index 3a49f68eda10..401d2e0aebf8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1793,6 +1793,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_sk_fullsock_proto = { + .func = bpf_sk_fullsock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + static inline int sk_skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { @@ -5406,6 +5420,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) switch (func_id) { case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; default: return sk_filter_func_proto(func_id, prog); } @@ -5477,6 +5493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; @@ -5764,6 +5782,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(__u64)) return false; break; + case offsetof(struct __sk_buff, sk): + if (type == BPF_WRITE || size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5950,6 +5973,18 @@ static bool __sock_filter_check_size(int off, int size, return size == size_default; } +bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range_till(struct bpf_sock, type, priority): + return false; + default: + return bpf_sock_is_valid_access(off, size, type, info); + } +} + bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -6748,6 +6783,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, pkt_len); *target_size = 4; *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); + break; + + case offsetof(struct __sk_buff, sk): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + break; } return insn - insn_buf; From aa65d6960a98fc15a96ce361b26e9fd55c9bccc5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:21 -0800 Subject: [PATCH 0928/1436] bpf: Add state, dst_ip4, dst_ip6 and dst_port to bpf_sock This patch adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the bpf_sock. The userspace has already been using "state", e.g. inet_diag (ss -t) and getsockopt(TCP_INFO). This patch also allows narrow load on the following existing fields: "family", "type", "protocol" and "src_port". Unlike IP address, the load offset is resticted to the first byte for them but it can be relaxed later if there is a use case. This patch also folds __sock_filter_check_size() into bpf_sock_is_valid_access() since it is not called by any where else. All bpf_sock checking is in one place. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++--- net/core/filter.c | 99 +++++++++++++++++++++++++++++++--------- 2 files changed, 85 insertions(+), 31 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5d79cba74ddc..d8f91777c5b6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2606,15 +2606,14 @@ struct bpf_sock { __u32 protocol; __u32 mark; __u32 priority; - __u32 src_ip4; /* Allows 1,2,4-byte read. - * Stored in network byte order. - */ - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. - * Stored in network byte order. - */ - __u32 src_port; /* Allows 4-byte read. - * Stored in host byte order - */ + /* IP address also allows 1 and 2 bytes access */ + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ + __u32 dst_port; /* network byte order */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; }; struct bpf_sock_tuple { diff --git a/net/core/filter.c b/net/core/filter.c index 401d2e0aebf8..01bb64bf2b5e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5958,21 +5958,6 @@ full_access: return true; } -static bool __sock_filter_check_size(int off, int size, - struct bpf_insn_access_aux *info) -{ - const int size_default = sizeof(__u32); - - switch (off) { - case bpf_ctx_range(struct bpf_sock, src_ip4): - case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): - bpf_ctx_record_field_size(info, size_default); - return bpf_ctx_narrow_access_ok(off, size, size_default); - } - - return size == size_default; -} - bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) @@ -5988,13 +5973,29 @@ bool bpf_sock_common_is_valid_access(int off, int size, bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (off < 0 || off >= sizeof(struct bpf_sock)) return false; if (off % size != 0) return false; - if (!__sock_filter_check_size(off, size, info)) - return false; - return true; + + switch (off) { + case offsetof(struct bpf_sock, state): + case offsetof(struct bpf_sock, family): + case offsetof(struct bpf_sock, type): + case offsetof(struct bpf_sock, protocol): + case offsetof(struct bpf_sock, dst_port): + case offsetof(struct bpf_sock, src_port): + case bpf_ctx_range(struct bpf_sock, src_ip4): + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): + case bpf_ctx_range(struct bpf_sock, dst_ip4): + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, size_default); + } + + return size == size_default; } static bool sock_filter_is_valid_access(int off, int size, @@ -6838,24 +6839,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock, family): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); - - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_family)); + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_family), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, + skc_family, + FIELD_SIZEOF(struct sock_common, + skc_family), + target_size)); break; case offsetof(struct bpf_sock, type): + BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + *target_size = 2; break; case offsetof(struct bpf_sock, protocol): + BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); + *target_size = 1; break; case offsetof(struct bpf_sock, src_ip4): @@ -6867,6 +6876,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, target_size)); break; + case offsetof(struct bpf_sock, dst_ip4): + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_daddr, + FIELD_SIZEOF(struct sock_common, + skc_daddr), + target_size)); + break; + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) off = si->off; @@ -6885,6 +6903,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, #endif break; + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + off = si->off; + off -= offsetof(struct bpf_sock, dst_ip6[0]); + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, + skc_v6_daddr.s6_addr32[0], + FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]), + target_size) + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); + *target_size = 4; +#endif + break; + case offsetof(struct bpf_sock, src_port): *insn++ = BPF_LDX_MEM( BPF_FIELD_SIZEOF(struct sock_common, skc_num), @@ -6894,6 +6929,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, skc_num), target_size)); break; + + case offsetof(struct bpf_sock, dst_port): + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_dport), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_dport, + FIELD_SIZEOF(struct sock_common, + skc_dport), + target_size)); + break; + + case offsetof(struct bpf_sock, state): + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_state), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_state, + FIELD_SIZEOF(struct sock_common, + skc_state), + target_size)); + break; } return insn - insn_buf; From 9b1f3d6e5af295a72deb5e3f04db07a6a58be72e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:23 -0800 Subject: [PATCH 0929/1436] bpf: Refactor sock_ops_convert_ctx_access The next patch will introduce a new "struct bpf_tcp_sock" which exposes the same tcp_sock's fields already exposed in "struct bpf_sock_ops". This patch refactor the existing convert_ctx_access() codes for "struct bpf_sock_ops" to get them ready to be reused for "struct bpf_tcp_sock". The "rtt_min" is not refactored in this patch because its handling is different from other fields. The SOCK_OPS_GET_TCP_SOCK_FIELD is new. All other SOCK_OPS_XXX_FIELD changes are code move only. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 287 ++++++++++++++++++++-------------------------- 1 file changed, 127 insertions(+), 160 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 01bb64bf2b5e..c0d7b9ef279f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5030,6 +5030,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ +#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ +do { \ + switch (si->off) { \ + case offsetof(md_type, snd_cwnd): \ + CONVERT(snd_cwnd); break; \ + case offsetof(md_type, srtt_us): \ + CONVERT(srtt_us); break; \ + case offsetof(md_type, snd_ssthresh): \ + CONVERT(snd_ssthresh); break; \ + case offsetof(md_type, rcv_nxt): \ + CONVERT(rcv_nxt); break; \ + case offsetof(md_type, snd_nxt): \ + CONVERT(snd_nxt); break; \ + case offsetof(md_type, snd_una): \ + CONVERT(snd_una); break; \ + case offsetof(md_type, mss_cache): \ + CONVERT(mss_cache); break; \ + case offsetof(md_type, ecn_flags): \ + CONVERT(ecn_flags); break; \ + case offsetof(md_type, rate_delivered): \ + CONVERT(rate_delivered); break; \ + case offsetof(md_type, rate_interval_us): \ + CONVERT(rate_interval_us); break; \ + case offsetof(md_type, packets_out): \ + CONVERT(packets_out); break; \ + case offsetof(md_type, retrans_out): \ + CONVERT(retrans_out); break; \ + case offsetof(md_type, total_retrans): \ + CONVERT(total_retrans); break; \ + case offsetof(md_type, segs_in): \ + CONVERT(segs_in); break; \ + case offsetof(md_type, data_segs_in): \ + CONVERT(data_segs_in); break; \ + case offsetof(md_type, segs_out): \ + CONVERT(segs_out); break; \ + case offsetof(md_type, data_segs_out): \ + CONVERT(data_segs_out); break; \ + case offsetof(md_type, lost_out): \ + CONVERT(lost_out); break; \ + case offsetof(md_type, sacked_out): \ + CONVERT(sacked_out); break; \ + case offsetof(md_type, bytes_received): \ + CONVERT(bytes_received); break; \ + case offsetof(md_type, bytes_acked): \ + CONVERT(bytes_acked); break; \ + } \ +} while (0) + #ifdef CONFIG_INET static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) @@ -7196,6 +7244,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn = insn_buf; int off; +/* Helper macro for adding read access to tcp_sock or sock fields. */ +#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ + OBJ_FIELD), \ + si->dst_reg, si->dst_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + } while (0) + +#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ + SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) + +/* Helper macro for adding write access to tcp_sock or sock fields. + * The macro is called with two registers, dst_reg which contains a pointer + * to ctx (context) and src_reg which contains the value that should be + * stored. However, we need an additional register since we cannot overwrite + * dst_reg because it may be used later in the program. + * Instead we "borrow" one of the other register. We first save its value + * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore + * it at the end of the macro. + */ +#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + int reg = BPF_REG_9; \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } while (0) + +#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ + do { \ + if (TYPE == BPF_WRITE) \ + SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + else \ + SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + } while (0) + + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops, + SOCK_OPS_GET_TCP_SOCK_FIELD); + + if (insn > insn_buf) + return insn - insn_buf; + switch (si->off) { case offsetof(struct bpf_sock_ops, op) ... offsetof(struct bpf_sock_ops, replylong[3]): @@ -7353,175 +7480,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, FIELD_SIZEOF(struct minmax_sample, t)); break; -/* Helper macro for adding read access to tcp_sock or sock fields. */ -#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ - do { \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, \ - is_fullsock), \ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, sk),\ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ - OBJ_FIELD), \ - si->dst_reg, si->dst_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ - } while (0) - -/* Helper macro for adding write access to tcp_sock or sock fields. - * The macro is called with two registers, dst_reg which contains a pointer - * to ctx (context) and src_reg which contains the value that should be - * stored. However, we need an additional register since we cannot overwrite - * dst_reg because it may be used later in the program. - * Instead we "borrow" one of the other register. We first save its value - * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore - * it at the end of the macro. - */ -#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ - do { \ - int reg = BPF_REG_9; \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ - if (si->dst_reg == reg || si->src_reg == reg) \ - reg--; \ - if (si->dst_reg == reg || si->src_reg == reg) \ - reg--; \ - *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ - offsetof(struct bpf_sock_ops_kern, \ - temp)); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, \ - is_fullsock), \ - reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, sk),\ - reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ - reg, si->src_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ - *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - temp)); \ - } while (0) - -#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ - do { \ - if (TYPE == BPF_WRITE) \ - SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ - else \ - SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ - } while (0) - - case offsetof(struct bpf_sock_ops, snd_cwnd): - SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, srtt_us): - SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock); - break; - case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, struct tcp_sock); break; - case offsetof(struct bpf_sock_ops, snd_ssthresh): - SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rcv_nxt): - SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, snd_nxt): - SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, snd_una): - SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, mss_cache): - SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, ecn_flags): - SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rate_delivered): - SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rate_interval_us): - SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, packets_out): - SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, retrans_out): - SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, total_retrans): - SOCK_OPS_GET_FIELD(total_retrans, total_retrans, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, segs_in): - SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, data_segs_in): - SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, segs_out): - SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, data_segs_out): - SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, lost_out): - SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, sacked_out): - SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock); - break; - case offsetof(struct bpf_sock_ops, sk_txhash): SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, struct sock, type); break; - - case offsetof(struct bpf_sock_ops, bytes_received): - SOCK_OPS_GET_FIELD(bytes_received, bytes_received, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, bytes_acked): - SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); - break; - } return insn - insn_buf; } From 655a51e536c09d15ffa3603b1b6fce2b45b85a1f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:24 -0800 Subject: [PATCH 0930/1436] bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock This patch adds a helper function BPF_FUNC_tcp_sock and it is currently available for cg_skb and sched_(cls|act): struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk); int cg_skb_foo(struct __sk_buff *skb) { struct bpf_tcp_sock *tp; struct bpf_sock *sk; __u32 snd_cwnd; sk = skb->sk; if (!sk) return 1; tp = bpf_tcp_sock(sk); if (!tp) return 1; snd_cwnd = tp->snd_cwnd; /* ... */ return 1; } A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide read-only access. bpf_tcp_sock has all the existing tcp_sock's fields that has already been exposed by the bpf_sock_ops. i.e. no new tcp_sock's fields are exposed in bpf.h. This helper returns a pointer to the tcp_sock. If it is not a tcp_sock or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it returns NULL. Hence, the caller needs to check for NULL before accessing it. The current use case is to expose members from tcp_sock to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 30 +++++++++++++++ include/uapi/linux/bpf.h | 51 +++++++++++++++++++++++++- kernel/bpf/verifier.c | 31 +++++++++++++++- net/core/filter.c | 79 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a60463b45b54..7f58828755fd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -204,6 +204,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ + RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -259,6 +260,8 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ + PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, } #endif +#ifdef CONFIG_INET +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_tcp_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif /* CONFIG_INET */ + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8f91777c5b6..25c8c0e62ecf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2337,6 +2337,15 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or NULL in * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2434,7 +2443,8 @@ union bpf_attr { FN(rc_pointer_rel), \ FN(spin_lock), \ FN(spin_unlock), \ - FN(sk_fullsock), + FN(sk_fullsock), \ + FN(tcp_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2616,6 +2626,45 @@ struct bpf_sock { __u32 state; }; +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ +}; + struct bpf_sock_tuple { union { struct { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b755d55a3791..1b9496c41383 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) static bool type_is_sk_pointer(enum bpf_reg_type type) { return type == PTR_TO_SOCKET || - type == PTR_TO_SOCK_COMMON; + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK; } static bool reg_type_may_be_null(enum bpf_reg_type type) { return type == PTR_TO_MAP_VALUE_OR_NULL || type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_SOCK_COMMON_OR_NULL; + type == PTR_TO_SOCK_COMMON_OR_NULL || + type == PTR_TO_TCP_SOCK_OR_NULL; } static bool type_is_refcounted(enum bpf_reg_type type) @@ -407,6 +409,8 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", [PTR_TO_SOCK_COMMON] = "sock_common", [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", }; static char slot_type_char[] = { @@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return true; default: return false; @@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, case PTR_TO_SOCKET: valid = bpf_sock_is_valid_access(off, size, t, &info); break; + case PTR_TO_TCP_SOCK: + valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); + break; default: valid = false; } @@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_SOCK_COMMON: pointer_desc = "sock_common "; break; + case PTR_TO_TCP_SOCK: + pointer_desc = "tcp_sock "; + break; default: break; } @@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; } + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_SOCKET; } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { reg->type = PTR_TO_SOCK_COMMON; + } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { + reg->type = PTR_TO_TCP_SOCK; } if (is_null || !(reg_is_refcounted(reg) || reg_may_point_to_spin_lock(reg))) { @@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return false; default: return true; @@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_SOCK_COMMON: convert_ctx_access = bpf_sock_convert_ctx_access; break; + case PTR_TO_TCP_SOCK: + convert_ctx_access = bpf_tcp_sock_convert_ctx_access; + break; default: continue; } diff --git a/net/core/filter.c b/net/core/filter.c index c0d7b9ef279f..353735575204 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + return false; + + if (off % size != 0) + return false; + + switch (off) { + case offsetof(struct bpf_tcp_sock, bytes_received): + case offsetof(struct bpf_tcp_sock, bytes_acked): + return size == sizeof(__u64); + default: + return size == sizeof(__u32); + } +} + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + +#define BPF_TCP_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ + si->dst_reg, si->src_reg, \ + offsetof(struct tcp_sock, FIELD)); \ + } while (0) + + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, + BPF_TCP_SOCK_GET_COMMON); + + if (insn > insn_buf) + return insn - insn_buf; + + switch (si->off) { + case offsetof(struct bpf_tcp_sock, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct tcp_sock, rtt_min) + + offsetof(struct minmax_sample, v)); + break; + } + + return insn - insn_buf; +} + +BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_tcp_sock_proto = { + .func = bpf_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; +#ifdef CONFIG_INET + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; +#endif default: return sk_filter_func_proto(func_id, prog); } @@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; #endif default: return bpf_base_func_proto(func_id); From 281f9e7572075879c0af4be792eaf31c17e7f894 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:25 -0800 Subject: [PATCH 0931/1436] bpf: Sync bpf.h to tools/ This patch sync the uapi bpf.h to tools/. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 72 ++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1777fa0c61e4..25c8c0e62ecf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2329,6 +2329,23 @@ union bpf_attr { * "**y**". * Return * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in bpf_sock can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2425,7 +2442,9 @@ union bpf_attr { FN(msg_pop_data), \ FN(rc_pointer_rel), \ FN(spin_lock), \ - FN(spin_unlock), + FN(spin_unlock), \ + FN(sk_fullsock), \ + FN(tcp_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2545,6 +2564,7 @@ struct __sk_buff { __u64 tstamp; __u32 wire_len; __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); }; struct bpf_tunnel_key { @@ -2596,14 +2616,52 @@ struct bpf_sock { __u32 protocol; __u32 mark; __u32 priority; - __u32 src_ip4; /* Allows 1,2,4-byte read. - * Stored in network byte order. + /* IP address also allows 1 and 2 bytes access */ + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ + __u32 dst_port; /* network byte order */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; +}; + +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. */ - __u32 src_ip6[4]; /* Allows 1,2,4-byte read. - * Stored in network byte order. + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. */ - __u32 src_port; /* Allows 4-byte read. - * Stored in host byte order + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. */ }; From fb47d1d931f8419645db15ef5fc0dc7a857c8f4e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:26 -0800 Subject: [PATCH 0932/1436] bpf: Add skb->sk, bpf_sk_fullsock and bpf_tcp_sock tests to test_verifer This patch tests accessing the skb->sk and the new helpers, bpf_sk_fullsock and bpf_tcp_sock. The errstr of some existing "reference tracking" tests is changed with s/bpf_sock/sock/ and s/socket/sock/ where "sock" is from the verifier's reg_type_str[]. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_util.h | 9 + .../selftests/bpf/verifier/ref_tracking.c | 4 +- tools/testing/selftests/bpf/verifier/sock.c | 384 ++++++++++++++++++ tools/testing/selftests/bpf/verifier/unpriv.c | 2 +- 4 files changed, 396 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/verifier/sock.c diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..197347031038 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -48,4 +48,13 @@ static inline unsigned int bpf_num_possible_cpus(void) # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +#endif + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index dc2cc823df2b..3ed3593bd8b6 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -547,7 +547,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "cannot write into socket", + .errstr = "cannot write into sock", .result = REJECT, }, { @@ -562,7 +562,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid bpf_sock access off=0 size=8", + .errstr = "invalid sock access off=0 size=8", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c new file mode 100644 index 000000000000..0ddfdf76aba5 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -0,0 +1,384 @@ +{ + "skb->sk: no NULL check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'sock_common_or_null'", +}, +{ + "skb->sk: sk->family [non fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "skb->sk: sk->type [fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock_common access", +}, +{ + "bpf_sk_fullsock(skb->sk): no !skb->sk check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "type=sock_common_or_null expected=sock_common", +}, +{ + "sk_fullsock(skb->sk): no NULL check on ret", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'sock_or_null'", +}, +{ + "sk_fullsock(skb->sk): sk->type [fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->family [non fullsock field]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->state [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->type [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->protocol [narrow load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): beyond last field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "bpf_tcp_sock(skb->sk): no !skb->sk check", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "type=sock_common_or_null expected=sock_common", +}, +{ + "bpf_tcp_sock(skb->sk): no NULL check on ret", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid mem access 'tcp_sock_or_null'", +}, +{ + "bpf_tcp_sock(skb->sk): tp->snd_cwnd", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_tcp_sock(skb->sk): tp->bytes_acked", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_tcp_sock(skb->sk): beyond last field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid tcp_sock access", +}, +{ + "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "bpf_sk_release(skb->sk)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "type=sock_common expected=sock", +}, +{ + "bpf_sk_release(bpf_sk_fullsock(skb->sk))", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "reference has not been acquired before", +}, +{ + "bpf_sk_release(bpf_tcp_sock(skb->sk))", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "type=tcp_sock expected=sock", +}, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 3e046695fad7..dbaf5be947b2 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -365,7 +365,7 @@ }, .result = REJECT, //.errstr = "same insn cannot be used with different pointers", - .errstr = "cannot write into socket", + .errstr = "cannot write into sock", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { From e0b27b3f97b8fce620331baad563833617c1f303 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:28 -0800 Subject: [PATCH 0933/1436] bpf: Add test_sock_fields for skb->sk and bpf_tcp_sock This patch adds a C program to show the usage on skb->sk and bpf_tcp_sock. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 6 +- tools/testing/selftests/bpf/bpf_helpers.h | 4 + .../testing/selftests/bpf/test_sock_fields.c | 327 ++++++++++++++++++ .../selftests/bpf/test_sock_fields_kern.c | 152 ++++++++ 4 files changed, 487 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_sock_fields.c create mode 100644 tools/testing/selftests/bpf/test_sock_fields_kern.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 383d2ff13fc7..c7e1e3255448 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt test_tcpnotify_user + test_netcnt test_tcpnotify_user test_sock_fields BPF_OBJ_FILES = \ test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ @@ -35,7 +35,8 @@ BPF_OBJ_FILES = \ sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o + xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o \ + test_sock_fields_kern.o # Objects are built with default compilation flags and with sub-register # code-gen enabled. @@ -111,6 +112,7 @@ $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c +$(OUTPUT)/test_sock_fields: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6a0ce0f055c5..d9999f1ed1d2 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -176,6 +176,10 @@ static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) BPF_FUNC_spin_lock; static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) BPF_FUNC_spin_unlock; +static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_fullsock; +static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_tcp_sock; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c new file mode 100644 index 000000000000..9bb58369b481 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cgroup_helpers.h" + +enum bpf_array_idx { + SRV_IDX, + CLI_IDX, + __NR_BPF_ARRAY_IDX, +}; + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ + printf(format); \ + printf("\n"); \ + exit(-1); \ + } \ +}) + +#define TEST_CGROUP "/test-bpf-sock-fields" +#define DATA "Hello BPF!" +#define DATA_LEN sizeof(DATA) + +static struct sockaddr_in6 srv_sa6, cli_sa6; +static int linum_map_fd; +static int addr_map_fd; +static int tp_map_fd; +static int sk_map_fd; +static __u32 srv_idx = SRV_IDX; +static __u32 cli_idx = CLI_IDX; + +static void init_loopback6(struct sockaddr_in6 *sa6) +{ + memset(sa6, 0, sizeof(*sa6)); + sa6->sin6_family = AF_INET6; + sa6->sin6_addr = in6addr_loopback; +} + +static void print_sk(const struct bpf_sock *sk) +{ + char src_ip4[24], dst_ip4[24]; + char src_ip6[64], dst_ip6[64]; + + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); + + printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, + sk->mark, sk->priority, + sk->src_ip4, src_ip4, + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], + src_ip6, sk->src_port, + sk->dst_ip4, dst_ip4, + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], + dst_ip6, ntohs(sk->dst_port)); +} + +static void print_tp(const struct bpf_tcp_sock *tp) +{ + printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " + "rate_delivered:%u rate_interval_us:%u packets_out:%u " + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " + "bytes_received:%llu bytes_acked:%llu\n", + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, + tp->packets_out, tp->retrans_out, tp->total_retrans, + tp->segs_in, tp->data_segs_in, tp->segs_out, + tp->data_segs_out, tp->lost_out, tp->sacked_out, + tp->bytes_received, tp->bytes_acked); +} + +static void check_result(void) +{ + struct bpf_tcp_sock srv_tp, cli_tp; + struct bpf_sock srv_sk, cli_sk; + __u32 linum, idx0 = 0; + int err; + + err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", + "err:%d errno:%d", err, errno); + + printf("srv_sk: "); + print_sk(&srv_sk); + printf("\n"); + + printf("cli_sk: "); + print_sk(&cli_sk); + printf("\n"); + + printf("srv_tp: "); + print_tp(&srv_tp); + printf("\n"); + + printf("cli_tp: "); + print_tp(&cli_tp); + printf("\n"); + + CHECK(srv_sk.state == 10 || + !srv_sk.state || + srv_sk.family != AF_INET6 || + srv_sk.protocol != IPPROTO_TCP || + memcmp(srv_sk.src_ip6, &in6addr_loopback, + sizeof(srv_sk.src_ip6)) || + memcmp(srv_sk.dst_ip6, &in6addr_loopback, + sizeof(srv_sk.dst_ip6)) || + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || + srv_sk.dst_port != cli_sa6.sin6_port, + "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); + + CHECK(cli_sk.state == 10 || + !cli_sk.state || + cli_sk.family != AF_INET6 || + cli_sk.protocol != IPPROTO_TCP || + memcmp(cli_sk.src_ip6, &in6addr_loopback, + sizeof(cli_sk.src_ip6)) || + memcmp(cli_sk.dst_ip6, &in6addr_loopback, + sizeof(cli_sk.dst_ip6)) || + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || + cli_sk.dst_port != srv_sa6.sin6_port, + "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); + + CHECK(srv_tp.data_segs_out != 1 || + srv_tp.data_segs_in || + srv_tp.snd_cwnd != 10 || + srv_tp.total_retrans || + srv_tp.bytes_acked != DATA_LEN, + "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); + + CHECK(cli_tp.data_segs_out || + cli_tp.data_segs_in != 1 || + cli_tp.snd_cwnd != 10 || + cli_tp.total_retrans || + cli_tp.bytes_received != DATA_LEN, + "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); +} + +static void test(void) +{ + int listen_fd, cli_fd, accept_fd, epfd, err; + struct epoll_event ev; + socklen_t addrlen; + + addrlen = sizeof(struct sockaddr_in6); + ev.events = EPOLLIN; + + epfd = epoll_create(1); + CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); + + /* Prepare listen_fd */ + listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", + listen_fd, errno); + + init_loopback6(&srv_sa6); + err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); + CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); + + err = listen(listen_fd, 1); + CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); + + /* Prepare cli_fd */ + cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); + + init_loopback6(&cli_sa6); + err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); + CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); + + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); + CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", + err, errno); + + /* Update addr_map with srv_sa6 and cli_sa6 */ + err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); + CHECK(err, "map_update", "err:%d errno:%d", err, errno); + + /* Connect from cli_sa6 to srv_sa6 */ + err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); + printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", + ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); + CHECK(err && errno != EINPROGRESS, + "connect(cli_fd)", "err:%d errno:%d", err, errno); + + ev.data.fd = listen_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", + err, errno); + + /* Accept the connection */ + /* Have some timeout in accept(listen_fd). Just in case. */ + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != listen_fd, + "epoll_wait(listen_fd)", + "err:%d errno:%d ev.data.fd:%d listen_fd:%d", + err, errno, ev.data.fd, listen_fd); + + accept_fd = accept(listen_fd, NULL, NULL); + CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", + accept_fd, errno); + close(listen_fd); + + /* Send some data from accept_fd to cli_fd */ + err = send(accept_fd, DATA, DATA_LEN, 0); + CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", + err, errno); + + /* Have some timeout in recv(cli_fd). Just in case. */ + ev.data.fd = cli_fd; + err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); + CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", + err, errno); + + err = epoll_wait(epfd, &ev, 1, 1000); + CHECK(err != 1 || ev.data.fd != cli_fd, + "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", + err, errno, ev.data.fd, cli_fd); + + err = recv(cli_fd, NULL, 0, MSG_TRUNC); + CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); + + close(epfd); + close(accept_fd); + close(cli_fd); + + check_result(); +} + +int main(int argc, char **argv) +{ + struct bpf_prog_load_attr attr = { + .file = "test_sock_fields_kern.o", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .expected_attach_type = BPF_CGROUP_INET_EGRESS, + }; + int cgroup_fd, prog_fd, err; + struct bpf_object *obj; + struct bpf_map *map; + + err = setup_cgroup_environment(); + CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d", + err, errno); + + atexit(cleanup_cgroup_environment); + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + CHECK(cgroup_fd == -1, "create_and_get_cgroup()", + "cgroup_fd:%d errno:%d", cgroup_fd, errno); + + err = join_cgroup(TEST_CGROUP); + CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); + + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", + "err:%d errno%d", err, errno); + close(cgroup_fd); + + map = bpf_object__find_map_by_name(obj, "addr_map"); + CHECK(!map, "cannot find addr_map", "(null)"); + addr_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "sock_result_map"); + CHECK(!map, "cannot find sock_result_map", "(null)"); + sk_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); + CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); + tp_map_fd = bpf_map__fd(map); + + map = bpf_object__find_map_by_name(obj, "linum_map"); + CHECK(!map, "cannot find linum_map", "(null)"); + linum_map_fd = bpf_map__fd(map); + + test(); + + bpf_object__close(obj); + cleanup_cgroup_environment(); + + printf("PASS\n"); + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_sock_fields_kern.c b/tools/testing/selftests/bpf/test_sock_fields_kern.c new file mode 100644 index 000000000000..de1a43e8f610 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock_fields_kern.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +enum bpf_array_idx { + SRV_IDX, + CLI_IDX, + __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") addr_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct sockaddr_in6), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") tcp_sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_tcp_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") linum_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +static bool is_loopback6(__u32 *a6) +{ + return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); +} + +static void skcpy(struct bpf_sock *dst, + const struct bpf_sock *src) +{ + dst->bound_dev_if = src->bound_dev_if; + dst->family = src->family; + dst->type = src->type; + dst->protocol = src->protocol; + dst->mark = src->mark; + dst->priority = src->priority; + dst->src_ip4 = src->src_ip4; + dst->src_ip6[0] = src->src_ip6[0]; + dst->src_ip6[1] = src->src_ip6[1]; + dst->src_ip6[2] = src->src_ip6[2]; + dst->src_ip6[3] = src->src_ip6[3]; + dst->src_port = src->src_port; + dst->dst_ip4 = src->dst_ip4; + dst->dst_ip6[0] = src->dst_ip6[0]; + dst->dst_ip6[1] = src->dst_ip6[1]; + dst->dst_ip6[2] = src->dst_ip6[2]; + dst->dst_ip6[3] = src->dst_ip6[3]; + dst->dst_port = src->dst_port; + dst->state = src->state; +} + +static void tpcpy(struct bpf_tcp_sock *dst, + const struct bpf_tcp_sock *src) +{ + dst->snd_cwnd = src->snd_cwnd; + dst->srtt_us = src->srtt_us; + dst->rtt_min = src->rtt_min; + dst->snd_ssthresh = src->snd_ssthresh; + dst->rcv_nxt = src->rcv_nxt; + dst->snd_nxt = src->snd_nxt; + dst->snd_una = src->snd_una; + dst->mss_cache = src->mss_cache; + dst->ecn_flags = src->ecn_flags; + dst->rate_delivered = src->rate_delivered; + dst->rate_interval_us = src->rate_interval_us; + dst->packets_out = src->packets_out; + dst->retrans_out = src->retrans_out; + dst->total_retrans = src->total_retrans; + dst->segs_in = src->segs_in; + dst->data_segs_in = src->data_segs_in; + dst->segs_out = src->segs_out; + dst->data_segs_out = src->data_segs_out; + dst->lost_out = src->lost_out; + dst->sacked_out = src->sacked_out; + dst->bytes_received = src->bytes_received; + dst->bytes_acked = src->bytes_acked; +} + +#define RETURN { \ + linum = __LINE__; \ + bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + return 1; \ +} + +SEC("cgroup_skb/egress") +int read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + struct sockaddr_in6 *srv_sa6, *cli_sa6; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + __u32 linum, idx0 = 0; + + sk = skb->sk; + if (!sk || sk->state == 10) + RETURN; + + sk = bpf_sk_fullsock(sk); + if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || + !is_loopback6(sk->src_ip6)) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); + if (!srv_sa6 || !cli_sa6) + RETURN; + + if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) + idx = srv_idx; + else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) + idx = cli_idx; + else + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +char _license[] SEC("license") = "GPL"; From b90efd2258749e04e1b3f71ef0d716f2ac2337e0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 7 Feb 2019 14:54:16 -0500 Subject: [PATCH 0934/1436] bpf: only adjust gso_size on bytestream protocols bpf_skb_change_proto and bpf_skb_adjust_room change skb header length. For GSO packets they adjust gso_size to maintain the same MTU. The gso size can only be safely adjusted on bytestream protocols. Commit d02f51cbcf12 ("bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs") excluded SKB_GSO_SCTP. Since then type SKB_GSO_UDP_L4 has been added, whose contents are one gso_size unit per datagram. Also exclude these. Move from a blacklist to a whitelist check to future proof against additional such new GSO types, e.g., for fraglist based GRO. Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 6 ++++++ net/core/filter.c | 12 ++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95d25b010a25..5a7a8b93a5ab 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4212,6 +4212,12 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } +static inline bool skb_is_gso_tcp(const struct sk_buff *skb) +{ + return skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); +} + static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; diff --git a/net/core/filter.c b/net/core/filter.c index 7a54dc11ac2d..f7d0004fc160 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2789,8 +2789,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2831,8 +2830,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2957,8 +2955,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2987,8 +2984,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); From e6762c8bcf982821935a2b1cb33cf8335d0eefae Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 8 Feb 2019 14:13:50 +0100 Subject: [PATCH 0935/1436] xsk: add missing smp_rmb() in xsk_mmap All the setup code in AF_XDP is protected by a mutex with the exception of the mmap code that cannot use it. To make sure that a process banging on the mmap call at the same time as another process is setting up the socket, smp_wmb() calls were added in the umem registration code and the queue creation code, so that the published structures that xsk_mmap needs would be consistent. However, the corresponding smp_rmb() calls were not added to the xsk_mmap code. This patch adds these calls. Fixes: 37b076933a8e3 ("xsk: add missing write- and data-dependency barrier") Fixes: c0c77d8fb787c ("xsk: add user memory registration support sockopt") Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a03268454a27..45f3b528dc09 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -669,6 +669,8 @@ static int xsk_mmap(struct file *file, struct socket *sock, if (!umem) return -EINVAL; + /* Matches the smp_wmb() in XDP_UMEM_REG */ + smp_rmb(); if (offset == XDP_UMEM_PGOFF_FILL_RING) q = READ_ONCE(umem->fq); else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) @@ -678,6 +680,8 @@ static int xsk_mmap(struct file *file, struct socket *sock, if (!q) return -EINVAL; + /* Matches the smp_wmb() in xsk_init_queue */ + smp_rmb(); qpg = virt_to_head_page(q->ring); if (size > (PAGE_SIZE << compound_order(qpg))) return -EINVAL; From bfc913682464f45bc4d6044084e370f9048de9d5 Mon Sep 17 00:00:00 2001 From: Meelis Roos Date: Fri, 12 Oct 2018 12:27:51 +0300 Subject: [PATCH 0936/1436] alpha: Fix Eiger NR_IRQS to 128 Eiger machine vector definition has nr_irqs 128, and working 2.6.26 boot shows SCSI getting IRQ-s 64 and 65. Current kernel boot fails because Symbios SCSI fails to request IRQ-s and does not find the disks. It has been broken at least since 3.18 - the earliest I could test with my gcc-5. The headers have moved around and possibly another order of defines has worked in the past - but since 128 seems to be correct and used, fix arch/alpha/include/asm/irq.h to have NR_IRQS=128 for Eiger. This fixes 4.19-rc7 boot on my Force Flexor A264 (Eiger subarch). Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Meelis Roos Signed-off-by: Matt Turner --- arch/alpha/include/asm/irq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h index 4d17cacd1462..432402c8e47f 100644 --- a/arch/alpha/include/asm/irq.h +++ b/arch/alpha/include/asm/irq.h @@ -56,15 +56,15 @@ #elif defined(CONFIG_ALPHA_DP264) || \ defined(CONFIG_ALPHA_LYNX) || \ - defined(CONFIG_ALPHA_SHARK) || \ - defined(CONFIG_ALPHA_EIGER) + defined(CONFIG_ALPHA_SHARK) # define NR_IRQS 64 #elif defined(CONFIG_ALPHA_TITAN) #define NR_IRQS 80 #elif defined(CONFIG_ALPHA_RAWHIDE) || \ - defined(CONFIG_ALPHA_TAKARA) + defined(CONFIG_ALPHA_TAKARA) || \ + defined(CONFIG_ALPHA_EIGER) # define NR_IRQS 128 #elif defined(CONFIG_ALPHA_WILDFIRE) From 491af60ffb848b59e82f7c9145833222e0bf27a5 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 31 Dec 2018 11:53:55 +0000 Subject: [PATCH 0937/1436] alpha: fix page fault handling for r16-r18 targets Fix page fault handling code to fixup r16-r18 registers. Before the patch code had off-by-two registers bug. This bug caused overwriting of ps,pc,gp registers instead of fixing intended r16,r17,r18 (see `struct pt_regs`). More details: Initially Dmitry noticed a kernel bug as a failure on strace test suite. Test passes unmapped userspace pointer to io_submit: ```c #include #include #include #include int main(void) { unsigned long ctx = 0; if (syscall(__NR_io_setup, 1, &ctx)) err(1, "io_setup"); const size_t page_size = sysconf(_SC_PAGESIZE); const size_t size = page_size * 2; void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == ptr) err(1, "mmap(%zu)", size); if (munmap(ptr, size)) err(1, "munmap"); syscall(__NR_io_submit, ctx, 1, ptr + page_size); syscall(__NR_io_destroy, ctx); return 0; } ``` Running this test causes kernel to crash when handling page fault: ``` Unable to handle kernel paging request at virtual address ffffffffffff9468 CPU 3 aio(26027): Oops 0 pc = [] ra = [] ps = 0000 Not tainted pc is at sys_io_submit+0x108/0x200 ra is at sys_io_submit+0x6c/0x200 v0 = fffffc00c58e6300 t0 = fffffffffffffff2 t1 = 000002000025e000 t2 = fffffc01f159fef8 t3 = fffffc0001009640 t4 = fffffc0000e0f6e0 t5 = 0000020001002e9e t6 = 4c41564e49452031 t7 = fffffc01f159c000 s0 = 0000000000000002 s1 = 000002000025e000 s2 = 0000000000000000 s3 = 0000000000000000 s4 = 0000000000000000 s5 = fffffffffffffff2 s6 = fffffc00c58e6300 a0 = fffffc00c58e6300 a1 = 0000000000000000 a2 = 000002000025e000 a3 = 00000200001ac260 a4 = 00000200001ac1e8 a5 = 0000000000000001 t8 = 0000000000000008 t9 = 000000011f8bce30 t10= 00000200001ac440 t11= 0000000000000000 pv = fffffc00006fd320 at = 0000000000000000 gp = 0000000000000000 sp = 00000000265fd174 Disabling lock debugging due to kernel taint Trace: [] entSys+0xa4/0xc0 ``` Here `gp` has invalid value. `gp is s overwritten by a fixup for the following page fault handler in `io_submit` syscall handler: ``` __se_sys_io_submit ... ldq a1,0(t1) bne t0,4280 <__se_sys_io_submit+0x180> ``` After a page fault `t0` should contain -EFALUT and `a1` is 0. Instead `gp` was overwritten in place of `a1`. This happens due to a off-by-two bug in `dpf_reg()` for `r16-r18` (aka `a0-a2`). I think the bug went unnoticed for a long time as `gp` is one of scratch registers. Any kernel function call would re-calculate `gp`. Dmitry tracked down the bug origin back to 2.1.32 kernel version where trap_a{0,1,2} fields were inserted into struct pt_regs. And even before that `dpf_reg()` contained off-by-one error. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: linux-alpha@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reported-and-reviewed-by: "Dmitry V. Levin" Cc: stable@vger.kernel.org # v2.1.32+ Bug: https://bugs.gentoo.org/672040 Signed-off-by: Sergei Trofimovich Signed-off-by: Matt Turner --- arch/alpha/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index d73dc473fbb9..188fc9256baf 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,7 +78,7 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+8 : (r)-10]) + (r) <= 18 ? (r)+10 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, From 81ec3f3c4c4d78f2d3b6689c9816bfbdf7417dbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 13:35:32 +0100 Subject: [PATCH 0938/1436] perf/x86: Add check_period PMU callback Vince (and later on Ravi) reported crashes in the BTS code during fuzzing with the following backtrace: general protection fault: 0000 [#1] SMP PTI ... RIP: 0010:perf_prepare_sample+0x8f/0x510 ... Call Trace: ? intel_pmu_drain_bts_buffer+0x194/0x230 intel_pmu_drain_bts_buffer+0x160/0x230 ? tick_nohz_irq_exit+0x31/0x40 ? smp_call_function_single_interrupt+0x48/0xe0 ? call_function_single_interrupt+0xf/0x20 ? call_function_single_interrupt+0xa/0x20 ? x86_schedule_events+0x1a0/0x2f0 ? x86_pmu_commit_txn+0xb4/0x100 ? find_busiest_group+0x47/0x5d0 ? perf_event_set_state.part.42+0x12/0x50 ? perf_mux_hrtimer_restart+0x40/0xb0 intel_pmu_disable_event+0xae/0x100 ? intel_pmu_disable_event+0xae/0x100 x86_pmu_stop+0x7a/0xb0 x86_pmu_del+0x57/0x120 event_sched_out.isra.101+0x83/0x180 group_sched_out.part.103+0x57/0xe0 ctx_sched_out+0x188/0x240 ctx_resched+0xa8/0xd0 __perf_event_enable+0x193/0x1e0 event_function+0x8e/0xc0 remote_function+0x41/0x50 flush_smp_call_function_queue+0x68/0x100 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x3e/0xe0 call_function_single_interrupt+0xf/0x20 The reason is that while event init code does several checks for BTS events and prevents several unwanted config bits for BTS event (like precise_ip), the PERF_EVENT_IOC_PERIOD allows to create BTS event without those checks being done. Following sequence will cause the crash: If we create an 'almost' BTS event with precise_ip and callchains, and it into a BTS event it will crash the perf_prepare_sample() function because precise_ip events are expected to come in with callchain data initialized, but that's not the case for intel_pmu_drain_bts_buffer() caller. Adding a check_period callback to be called before the period is changed via PERF_EVENT_IOC_PERIOD. It will deny the change if the event would become BTS. Plus adding also the limit_period check as well. Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190204123532.GA4794@krava Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 14 ++++++++++++++ arch/x86/events/intel/core.c | 9 +++++++++ arch/x86/events/perf_event.h | 16 ++++++++++++++-- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 16 ++++++++++++++++ 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 374a19712e20..b684f0294f35 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2278,6 +2278,19 @@ void perf_check_microcode(void) x86_pmu.check_microcode(); } +static int x86_pmu_check_period(struct perf_event *event, u64 value) +{ + if (x86_pmu.check_period && x86_pmu.check_period(event, value)) + return -EINVAL; + + if (value && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, value) > value) + return -EINVAL; + } + + return 0; +} + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, @@ -2302,6 +2315,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, .task_ctx_size = sizeof(struct x86_perf_task_context), + .check_period = x86_pmu_check_period, }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index daafb893449b..730978dff63f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3587,6 +3587,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx, intel_pmu_lbr_sched_task(ctx, sched_in); } +static int intel_pmu_check_period(struct perf_event *event, u64 value) +{ + return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; +} + PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); @@ -3667,6 +3672,8 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .cpu_dead = intel_pmu_cpu_dead, + + .check_period = intel_pmu_check_period, }; static struct attribute *intel_pmu_attrs[]; @@ -3711,6 +3718,8 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, + + .check_period = intel_pmu_check_period, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78d7b7031bfc..d46fd6754d92 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -646,6 +646,11 @@ struct x86_pmu { * Intel host/guest support (KVM) */ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 period); }; struct x86_perf_task_context { @@ -857,7 +862,7 @@ static inline int amd_pmu_init(void) #ifdef CONFIG_CPU_SUP_INTEL -static inline bool intel_pmu_has_bts(struct perf_event *event) +static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) { struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; @@ -868,7 +873,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return hw_event == bts_event && hwc->sample_period == 1; + return hw_event == bts_event && period == 1; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return intel_pmu_has_bts_period(event, hwc->sample_period); } int intel_pmu_save_and_restart(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..e1a051724f7e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -447,6 +447,11 @@ struct pmu { * Filter events for PMU-specific reasons. */ int (*filter_match) (struct perf_event *event); /* optional */ + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 value); /* optional */ }; enum perf_addr_filter_action_t { diff --git a/kernel/events/core.c b/kernel/events/core.c index e5ede6918050..26d6edab051a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4963,6 +4963,11 @@ static void __perf_event_period(struct perf_event *event, } } +static int perf_event_check_period(struct perf_event *event, u64 value) +{ + return event->pmu->check_period(event, value); +} + static int perf_event_period(struct perf_event *event, u64 __user *arg) { u64 value; @@ -4979,6 +4984,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (event->attr.freq && value > sysctl_perf_event_sample_rate) return -EINVAL; + if (perf_event_check_period(event, value)) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -9391,6 +9399,11 @@ static int perf_pmu_nop_int(struct pmu *pmu) return 0; } +static int perf_event_nop_int(struct perf_event *event, u64 value) +{ + return 0; +} + static DEFINE_PER_CPU(unsigned int, nop_txn_flags); static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) @@ -9691,6 +9704,9 @@ got_cpu_context: pmu->pmu_disable = perf_pmu_nop_void; } + if (!pmu->check_period) + pmu->check_period = perf_event_nop_int; + if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; From 4926b51bfaa6d36bd6f398fb7698679d3962e19d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 13:17:12 +0200 Subject: [PATCH 0939/1436] mac80211: call drv_ibss_join() on restart If a driver does any significant activity in its ibss_join method, then it will very well expect that to be called during restart, before any stations are added. Do that. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/util.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d0eb38b890aa..ba950ae974fc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2146,6 +2146,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_ADHOC: + if (sdata->vif.bss_conf.ibss_joined) + WARN_ON(drv_join_ibss(local, sdata)); + /* fall through */ default: ieee80211_reconfig_stations(sdata); /* fall through */ From ea18709a6f102f975178c21aa7fc4b69eeba1424 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Wed, 6 Feb 2019 13:17:08 +0200 Subject: [PATCH 0940/1436] nl80211: Fix FTM per burst maximum value Fix FTM per burst maximum value from 15 to 31 (The maximal bits that represents that number in the frame is 5 hence a maximal value of 31) Signed-off-by: Aviya Erenfeld Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74150ad95823..d91a408db113 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -250,7 +250,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = - NLA_POLICY_MAX(NLA_U8, 15), + NLA_POLICY_MAX(NLA_U8, 31), [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, From ff1bab1ba19165d4402447dd39abae9e21880ebf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 13:17:07 +0200 Subject: [PATCH 0941/1436] cfg80211: pmsr: record netlink port ID Without recording the netlink port ID, we cannot return the results or complete messages to userspace, nor will we be able to abort if the socket is closed, so clearly we need to fill the value. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/pmsr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 78c3f5633692..0216ab555249 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -271,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) req->n_peers = count; req->cookie = cfg80211_assign_cookie(rdev); + req->nl_portid = info->snd_portid; err = rdev_start_pmsr(rdev, wdev, req); if (err) From 1fc9b7253382ce1a83d9a3e63e88d656eb63f263 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Feb 2019 13:17:14 +0200 Subject: [PATCH 0942/1436] cfg80211: prevent speculation on cfg80211_classify8021d() return It's possible that the caller of cfg80211_classify8021d() uses the value to index an array, like mac80211 in ieee80211_downgrade_queue(). Prevent speculation on the return value. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/util.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index cd48cdd582c0..ec30e3732c7b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #include #include @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "rdev-ops.h" @@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, { unsigned int dscp; unsigned char vlan_priority; + unsigned int ret; /* skb->priority values from 256->263 are magic values to * directly indicate a specific 802.1d priority. This is used * to allow 802.1d priority to be passed directly in from VLAN * tags, etc. */ - if (skb->priority >= 256 && skb->priority <= 263) - return skb->priority - 256; + if (skb->priority >= 256 && skb->priority <= 263) { + ret = skb->priority - 256; + goto out; + } if (skb_vlan_tag_present(skb)) { vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - if (vlan_priority > 0) - return vlan_priority; + if (vlan_priority > 0) { + ret = vlan_priority; + goto out; + } } switch (skb->protocol) { @@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (!mpls) return 0; - return (ntohl(mpls->entry) & MPLS_LS_TC_MASK) + ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + goto out; } case htons(ETH_P_80221): /* 802.21 is always network control traffic */ @@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, unsigned int i, tmp_dscp = dscp >> 2; for (i = 0; i < qos_map->num_des; i++) { - if (tmp_dscp == qos_map->dscp_exception[i].dscp) - return qos_map->dscp_exception[i].up; + if (tmp_dscp == qos_map->dscp_exception[i].dscp) { + ret = qos_map->dscp_exception[i].up; + goto out; + } } for (i = 0; i < 8; i++) { if (tmp_dscp >= qos_map->up[i].low && - tmp_dscp <= qos_map->up[i].high) - return i; + tmp_dscp <= qos_map->up[i].high) { + ret = i; + goto out; + } } } - return dscp >> 5; + ret = dscp >> 5; +out: + return array_index_nospec(ret, IEEE80211_NUM_TIDS); } EXPORT_SYMBOL(cfg80211_classify8021d); From 6157ca0d6bfe437691b1e98a62e2efe12b6714da Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 6 Feb 2019 13:17:21 +0200 Subject: [PATCH 0943/1436] mac80211: Fix Tx aggregation session tear down with ITXQs When mac80211 requests the low level driver to stop an ongoing Tx aggregation, the low level driver is expected to call ieee80211_stop_tx_ba_cb_irqsafe() to indicate that it is ready to stop the session. The callback in turn schedules a worker to complete the session tear down, which in turn also handles the relevant state for the intermediate Tx queue. However, as this flow in asynchronous, the intermediate queue should be stopped and not continue servicing frames, as in such a case frames that are dequeued would be marked as part of an aggregation, although the aggregation is already been stopped. Fix this by stopping the intermediate Tx queue, before calling the low level driver to stop the Tx aggregation. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 69e831bc317b..54821fb1a960 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", From 3defaf2f15b2bfd86c6664181ac009e91985f8ac Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 10 Feb 2019 12:52:35 -0800 Subject: [PATCH 0944/1436] bpf: fix lockdep false positive in stackmap Lockdep warns about false positive: [ 11.211460] ------------[ cut here ]------------ [ 11.211936] DEBUG_LOCKS_WARN_ON(depth <= 0) [ 11.211985] WARNING: CPU: 0 PID: 141 at ../kernel/locking/lockdep.c:3592 lock_release+0x1ad/0x280 [ 11.213134] Modules linked in: [ 11.214954] RIP: 0010:lock_release+0x1ad/0x280 [ 11.223508] Call Trace: [ 11.223705] [ 11.223874] ? __local_bh_enable+0x7a/0x80 [ 11.224199] up_read+0x1c/0xa0 [ 11.224446] do_up_read+0x12/0x20 [ 11.224713] irq_work_run_list+0x43/0x70 [ 11.225030] irq_work_run+0x26/0x50 [ 11.225310] smp_irq_work_interrupt+0x57/0x1f0 [ 11.225662] irq_work_interrupt+0xf/0x20 since rw_semaphore is released in a different task vs task that locked the sema. It is expected behavior. Fix the warning with up_read_non_owner() and rwsem_release() annotation. Fixes: bae77c5eb5b2 ("bpf: enable stackmap with build_id in nmi context") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/stackmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d43b14535827..950ab2f28922 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -44,7 +44,7 @@ static void do_up_read(struct irq_work *entry) struct stack_map_irq_work *work; work = container_of(entry, struct stack_map_irq_work, irq_work); - up_read(work->sem); + up_read_non_owner(work->sem); work->sem = NULL; } @@ -338,6 +338,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } else { work->sem = ¤t->mm->mmap_sem; irq_work_queue(&work->irq_work); + /* + * The irq_work will release the mmap_sem with + * up_read_non_owner(). The rwsem_release() is called + * here to release the lock from lockdep's perspective. + */ + rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_); } } From 9659c1af451a92533d2d8dfcecd1a848a08a883b Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Mon, 17 Dec 2018 14:13:47 -0800 Subject: [PATCH 0945/1436] drm/i915/icl: combo port vswing programming changes per BSPEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In August 2018 the BSPEC changed the ICL port programming sequence to closely resemble earlier gen programming sequence. Restrict combo phy to HBR max rate unless eDP panel is connected to port. v2: remove debug code that Imre found v3: simplify translation table if-else v4: edp translation table now based on link rate and low_swing v5: Misc review comments + r-b BSpec: 21257 Cc: Ville Syrjälä Cc: Imre Deak Cc: Rodrigo Vivi Reviewed-by: Imre Deak Signed-off-by: Clint Taylor Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/1545084827-5776-1-git-send-email-clinton.a.taylor@intel.com (cherry picked from commit b265a2a6255f581258ccfdccbd2efca51a142fe2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 4 + drivers/gpu/drm/i915/intel_ddi.c | 238 +++++++++++-------------------- drivers/gpu/drm/i915/intel_dp.c | 4 +- 3 files changed, 93 insertions(+), 153 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0a7d60509ca7..29acdb96160d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1866,6 +1866,10 @@ enum i915_power_well_id { #define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) #define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) +#define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) +#define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) +#define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) +#define ICL_PORT_TX_DW7_LN(port, ln) _MMIO(_ICL_PORT_TX_DW_LN(7, ln, port)) #define N_SCALAR(x) ((x) << 24) #define N_SCALAR_MASK (0x7F << 24) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 4079050f9d6c..7edce1b7b348 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -494,103 +494,58 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; -struct icl_combo_phy_ddi_buf_trans { - u32 dw2_swing_select; - u32 dw2_swing_scalar; - u32 dw4_scaling; +/* icl_combo_phy_ddi_translations */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ + { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ + { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ + { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; -/* Voltage Swing Programming for VccIO 0.85V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0xB, 0x70, 0x0018 }, /* 600 0.0 */ - { 0xB, 0x70, 0x3015 }, /* 600 3.5 */ - { 0xB, 0x70, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x00, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x00, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { 0x0, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ + { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ + { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ + { 0x9, 0x7F, 0x31, 0x00, 0x0E }, /* 200 350 4.9 */ + { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ + { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ + { 0x9, 0x7F, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ + { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ + { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ + { 0x9, 0x7F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ }; -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 0.85V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr3[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ + { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ + { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ + { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; -/* Voltage Swing Programming for VccIO 0.95V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ - { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ - { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x76, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x76, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ -}; - -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 0.95V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for DP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = { - /* Voltage mV db */ - { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ - { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ - { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ - { 0x2, 0x98, 0x900F }, /* 400 9.5 */ - { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ - { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ - { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ - { 0x5, 0x71, 0x0018 }, /* 800 0.0 */ - { 0x5, 0x71, 0x3015 }, /* 800 3.5 */ - { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ -}; - -/* FIXME - After table is updated in Bspec */ -/* Voltage Swing Programming for VccIO 1.05V for eDP */ -static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = { - /* Voltage mV db */ - { 0x0, 0x00, 0x00 }, /* 200 0.0 */ - { 0x0, 0x00, 0x00 }, /* 200 1.5 */ - { 0x0, 0x00, 0x00 }, /* 200 4.0 */ - { 0x0, 0x00, 0x00 }, /* 200 6.0 */ - { 0x0, 0x00, 0x00 }, /* 250 0.0 */ - { 0x0, 0x00, 0x00 }, /* 250 1.5 */ - { 0x0, 0x00, 0x00 }, /* 250 4.0 */ - { 0x0, 0x00, 0x00 }, /* 300 0.0 */ - { 0x0, 0x00, 0x00 }, /* 300 1.5 */ - { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ + { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ + { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ + { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 ALS */ + { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ + { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ }; struct icl_mg_phy_ddi_buf_trans { @@ -871,43 +826,23 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } -static const struct icl_combo_phy_ddi_buf_trans * +static const struct cnl_ddi_buf_trans * icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, - int type, int *n_entries) + int type, int rate, int *n_entries) { - u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; - - if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { - switch (voltage) { - case VOLTAGE_INFO_0_85V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); - return icl_combo_phy_ddi_translations_edp_0_85V; - case VOLTAGE_INFO_0_95V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); - return icl_combo_phy_ddi_translations_edp_0_95V; - case VOLTAGE_INFO_1_05V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); - return icl_combo_phy_ddi_translations_edp_1_05V; - default: - MISSING_CASE(voltage); - return NULL; - } - } else { - switch (voltage) { - case VOLTAGE_INFO_0_85V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); - return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; - case VOLTAGE_INFO_0_95V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); - return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; - case VOLTAGE_INFO_1_05V: - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); - return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; - default: - MISSING_CASE(voltage); - return NULL; - } + if (type == INTEL_OUTPUT_HDMI) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + } else if (rate > 540000 && type == INTEL_OUTPUT_EDP) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; } + + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); + return icl_combo_phy_ddi_translations_dp_hbr2; } static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) @@ -918,8 +853,8 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_ICELAKE(dev_priv)) { if (intel_port_is_combophy(dev_priv, port)) - icl_get_combo_buf_trans(dev_priv, port, - INTEL_OUTPUT_HDMI, &n_entries); + icl_get_combo_buf_trans(dev_priv, port, INTEL_OUTPUT_HDMI, + 0, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); default_entry = n_entries - 1; @@ -2275,13 +2210,14 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = encoder->port; int n_entries; if (IS_ICELAKE(dev_priv)) { if (intel_port_is_combophy(dev_priv, port)) icl_get_combo_buf_trans(dev_priv, port, encoder->type, - &n_entries); + intel_dp->link_rate, &n_entries); else n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); } else if (IS_CANNONLAKE(dev_priv)) { @@ -2462,14 +2398,15 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, } static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) + u32 level, enum port port, int type, + int rate) { - const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + const struct cnl_ddi_buf_trans *ddi_translations = NULL; u32 n_entries, val; int ln; ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, - &n_entries); + rate, &n_entries); if (!ddi_translations) return; @@ -2478,34 +2415,23 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, level = n_entries - 1; } - /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + /* Set PORT_TX_DW5 */ val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); - val &= ~RTERM_SELECT_MASK; + val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK | + TAP2_DISABLE | TAP3_DISABLE); + val |= SCALING_MODE_SEL(0x2); val |= RTERM_SELECT(0x6); - I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW5 */ - val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); - /* Set DisableTap2 and DisableTap3 if MIPI DSI - * Clear DisableTap2 and DisableTap3 for all other Ports - */ - if (type == INTEL_OUTPUT_DSI) { - val |= TAP2_DISABLE; - val |= TAP3_DISABLE; - } else { - val &= ~TAP2_DISABLE; - val &= ~TAP3_DISABLE; - } + val |= TAP3_DISABLE; I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); /* Program PORT_TX_DW2 */ val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); /* Program Rcomp scalar for every table entry */ - val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + val |= RCOMP_SCALAR(0x98); I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); /* Program PORT_TX_DW4 */ @@ -2514,9 +2440,17 @@ static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); - val |= ddi_translations[level].dw4_scaling; + val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); + val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); + val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); } + + /* Program PORT_TX_DW7 */ + val = I915_READ(ICL_PORT_TX_DW7_LN0(port)); + val &= ~N_SCALAR_MASK; + val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); + I915_WRITE(ICL_PORT_TX_DW7_GRP(port), val); } static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, @@ -2581,7 +2515,7 @@ static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); /* 5. Program swing and de-emphasis */ - icl_ddi_combo_vswing_program(dev_priv, level, port, type); + icl_ddi_combo_vswing_program(dev_priv, level, port, type, rate); /* 6. Set training enable to trigger update */ val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fdd2cbc56fa3..22a74608c6e4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -304,9 +304,11 @@ static int cnl_max_source_rate(struct intel_dp *intel_dp) static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; - if (port == PORT_B) + if (intel_port_is_combophy(dev_priv, port) && + !intel_dp_is_edp(intel_dp)) return 540000; return 810000; From 3b91a93597327f682a1c4392350ced99a44840b7 Mon Sep 17 00:00:00 2001 From: Aditya Swarup Date: Thu, 10 Jan 2019 15:08:44 -0800 Subject: [PATCH 0946/1436] drm/i915/cnl: Fix CNL macros for Voltage Swing programming CNL macros for register groups CNL_PORT_TX_DW2_* / CNL_PORT_TX_DW5_* are configured incorrectly wrt definition of _CNL_PORT_TX_DW_GRP. v2: Jani suggested to keep the macros organized semantically i.e., by function, secondarily by port/pipe/transcoder.->(dw, port) Fixes: 4e53840fdfdd ("drm/i915/icl: Introduce new macros to get combophy registers") Cc: Clint Taylor Cc: Imre Deak Cc: Jani Nikula Signed-off-by: Aditya Swarup Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190110230844.9213-1-aditya.swarup@intel.com (cherry picked from commit b14c06ec024947eaa35212f2380e90233d5092e0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 29acdb96160d..067054cf4a86 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1790,7 +1790,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 #define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 #define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ _CNL_PORT_TX_B_GRP_OFFSET, \ @@ -1798,7 +1798,7 @@ enum i915_power_well_id { _CNL_PORT_TX_AE_GRP_OFFSET, \ _CNL_PORT_TX_F_GRP_OFFSET) + \ 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ +#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ _CNL_PORT_TX_AE_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ _CNL_PORT_TX_B_LN0_OFFSET, \ @@ -1834,9 +1834,9 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(4, port)) @@ -1864,8 +1864,8 @@ enum i915_power_well_id { #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) #define ICL_PORT_TX_DW7_AUX(port) _MMIO(_ICL_PORT_TX_DW_AUX(7, port)) #define ICL_PORT_TX_DW7_GRP(port) _MMIO(_ICL_PORT_TX_DW_GRP(7, port)) #define ICL_PORT_TX_DW7_LN0(port) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, port)) From c4c07b4d6fa1f11880eab8e076d3d060ef3f55fc Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 6 Feb 2019 22:56:15 +0100 Subject: [PATCH 0947/1436] netfilter: nf_nat_snmp_basic: add missing length checks in ASN.1 cbs The generic ASN.1 decoder infrastructure doesn't guarantee that callbacks will get as much data as they expect; callbacks have to check the `datalen` parameter before looking at `data`. Make sure that snmp_version() and snmp_helper() don't read/write beyond the end of the packet data. (Also move the assignment to `pdata` down below the check to make it clear that it isn't necessarily a pointer we can use before the `datalen` check.) Fixes: cc2d58634e0f ("netfilter: nf_nat_snmp_basic: use asn1 decoder library") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_snmp_basic_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index a0aa13bcabda..0a8a60c1bf9a 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) int snmp_version(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { + if (datalen != 1) + return -EINVAL; if (*(unsigned char *)data > 1) return -ENOTSUPP; return 1; @@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { struct snmp_ctx *ctx = (struct snmp_ctx *)context; - __be32 *pdata = (__be32 *)data; + __be32 *pdata; + if (datalen != 4) + return -EINVAL; + pdata = (__be32 *)data; if (*pdata == ctx->from) { pr_debug("%s: %pI4 to %pI4\n", __func__, (void *)&ctx->from, (void *)&ctx->to); From 8303b7e8f018724a2cd7752eb29c2801fa8c4067 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Feb 2019 16:39:52 +0100 Subject: [PATCH 0948/1436] netfilter: nat: fix spurious connection timeouts Sander Eikelenboom bisected a NAT related regression down to the l4proto->manip_pkt indirection removal. I forgot that ICMP(v6) errors (e.g. PKTTOOBIG) can be set as related to the existing conntrack entry. Therefore, when passing the skb to nf_nat_ipv4/6_manip_pkt(), that ended up calling the wrong l4 manip function, as tuple->dst.protonum is the original flows l4 protocol (TCP, UDP, etc). Set the dst protocol field to ICMP(v6), we already have a private copy of the tuple due to the inversion of src/dst. Reported-by: Sander Eikelenboom Tested-by: Sander Eikelenboom Fixes: faec18dbb0405 ("netfilter: nat: remove l4proto->manip_pkt") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 1 + net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 2687db015b6f..fa2ba7c500e4 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMP; if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 23022447eb49..7a41ee3c11b4 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -226,6 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMPV6; if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; From ff0c129d3b5ecb3df7c8f5e2236582bf745b6c5f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Feb 2019 10:52:07 -0500 Subject: [PATCH 0949/1436] dm crypt: don't overallocate the integrity tag space bio_sectors() returns the value in the units of 512-byte sectors (no matter what the real sector size of the device). dm-crypt multiplies bio_sectors() by on_disk_tag_size to calculate the space allocated for integrity tags. If dm-crypt is running with sector size larger than 512b, it allocates more data than is needed. Device Mapper trims the extra space when passing the bio to dm-integrity, so this bug didn't result in any visible misbehavior. But it must be fixed to avoid wasteful memory allocation for the block integrity payload. Fixes: ef43aa38063a6 ("dm crypt: add cryptographic data integrity protection (authenticated encryption)") Cc: stable@vger.kernel.org # 4.12+ Reported-by: Milan Broz Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 47d4e0d30bf0..dd538e6b2748 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -932,7 +932,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) if (IS_ERR(bip)) return PTR_ERR(bip); - tag_len = io->cc->on_disk_tag_size * bio_sectors(bio); + tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift); bip->bip_iter.bi_size = tag_len; bip->bip_iter.bi_sector = io->cc->start + io->sector; From 1413d9af241c3c3b36319213b725a9e95be3935f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 7 Feb 2019 15:15:42 -0500 Subject: [PATCH 0950/1436] Documentation: Fix grammatical error in sysctl/fs.txt & clarify negative dentry Fix a grammatical error in the dentry-state text and clarify the usage of negative dentries. Fixes: af0c9af1b3f66 ("fs/dcache: Track & report number of negative dentries") Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 58649bd4fcfc..ebc679bcb2dc 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -80,7 +80,9 @@ nonzero when shrink_dcache_pages() has been called and the dcache isn't pruned yet. nr_negative shows the number of unused dentries that are also -negative dentries which do not mapped to actual files. +negative dentries which do not map to any files. Instead, +they help speeding up rejection of non-existing files provided +by the users. ============================================================== From e451eb51068496054d071af10b3530af4002a4f4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 11 Feb 2019 08:15:29 -0800 Subject: [PATCH 0951/1436] xsk: share the mmap_sem for page pinning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Holding mmap_sem exclusively for a gup() is an overkill. Lets share the lock and replace the gup call for gup_longterm(), as it is better suited for the lifetime of the pinning. Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt") Signed-off-by: Davidlohr Bueso Cc: David S. Miller Cc: Bjorn Topel Cc: Magnus Karlsson CC: netdev@vger.kernel.org Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index d4de871e7d4d..597866e7c441 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -259,10 +259,10 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem) if (!umem->pgs) return -ENOMEM; - down_write(¤t->mm->mmap_sem); - npgs = get_user_pages(umem->address, umem->npgs, - gup_flags, &umem->pgs[0], NULL); - up_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); + npgs = get_user_pages_longterm(umem->address, umem->npgs, + gup_flags, &umem->pgs[0], NULL); + up_read(¤t->mm->mmap_sem); if (npgs != umem->npgs) { if (npgs >= 0) { From 989723b00b7fecad9acad55b151c73ccf2eac3ca Mon Sep 17 00:00:00 2001 From: Jouke Witteveen Date: Thu, 7 Feb 2019 17:14:32 +0100 Subject: [PATCH 0952/1436] Documentation: bring operstate documentation up-to-date Netlink has moved from bitmasks to group numbers long ago. Signed-off-by: Jouke Witteveen Signed-off-by: David S. Miller --- Documentation/networking/operstates.txt | 14 ++++++++------ net/sched/sch_generic.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 355c6d8ef8ad..b203d1334822 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -22,8 +22,9 @@ and changeable from userspace under certain rules. 2. Querying from userspace Both admin and operational state can be queried via the netlink -operation RTM_GETLINK. It is also possible to subscribe to RTMGRP_LINK -to be notified of updates. This is important for setting from userspace. +operation RTM_GETLINK. It is also possible to subscribe to RTNLGRP_LINK +to be notified of updates while the interface is admin up. This is +important for setting from userspace. These values contain interface state: @@ -101,8 +102,9 @@ because some driver controlled protocol establishment has to complete. Corresponding functions are netif_dormant_on() to set the flag, netif_dormant_off() to clear it and netif_dormant() to query. -On device allocation, networking core sets the flags equivalent to -netif_carrier_ok() and !netif_dormant(). +On device allocation, both flags __LINK_STATE_NOCARRIER and +__LINK_STATE_DORMANT are cleared, so the effective state is equivalent +to netif_carrier_ok() and !netif_dormant(). Whenever the driver CHANGES one of these flags, a workqueue event is @@ -133,11 +135,11 @@ netif_carrier_ok() && !netif_dormant() is set by the driver. Afterwards, the userspace application can set IFLA_OPERSTATE to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set netif_carrier_off() or netif_dormant_on(). Changes made by userspace -are multicasted on the netlink group RTMGRP_LINK. +are multicasted on the netlink group RTNLGRP_LINK. So basically a 802.1X supplicant interacts with the kernel like this: --subscribe to RTMGRP_LINK +-subscribe to RTNLGRP_LINK -set IFLA_LINKMODE to 1 via RTM_SETLINK -query RTM_GETLINK once to get initial state -if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 66ba2ce2320f..968a85fe4d4a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev) * netif_carrier_on - set carrier * @dev: network device * - * Device has detected that carrier. + * Device has detected acquisition of carrier. */ void netif_carrier_on(struct net_device *dev) { From 4179cb5a4c924cd233eaadd081882425bc98f44e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Feb 2019 12:27:38 -0800 Subject: [PATCH 0953/1436] vxlan: test dev->flags & IFF_UP before calling netif_rx() netif_rx() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Otherwise we risk use-after-free and/or crashes. Note this patch also fixes a small issue that came with commit ce6502a8f957 ("vxlan: fix a use after free in vxlan_encap_bypass"), since the dev->stats.rx_dropped change was done on the wrong device. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Fixes: ce6502a8f957 ("vxlan: fix a use after free in vxlan_encap_bypass") Signed-off-by: Eric Dumazet Cc: Petr Machata Cc: Ido Schimmel Cc: Roopa Prabhu Cc: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5209ee9aac47..2aae11feff0c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2219,7 +2219,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; - struct net_device *dev = skb->dev; + struct net_device *dev; int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); @@ -2239,9 +2239,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, #endif } + rcu_read_lock(); + dev = skb->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + goto drop; + } + if (dst_vxlan->cfg.flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0, - vni); + vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -2254,8 +2260,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { +drop: dev->stats.rx_dropped++; } + rcu_read_unlock(); } static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, From cbd72b485214f085a022b56a3051524577d891fe Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Thu, 7 Feb 2019 20:05:58 -0800 Subject: [PATCH 0954/1436] net: dsa: microchip: add switch offload forwarding support The flag offload_fwd_mark is set as the switch can forward frames by itself. This can be considered a fix to a problem introduced in commit c2e866911e254067 where the port membership are not set in sync. The flag offload_fwd_mark just needs to be set in tag_ksz.c to prevent the software bridge from forwarding duplicate multicast frames. Fixes: c2e866911e254067 ("microchip: break KSZ9477 DSA driver into two files") Signed-off-by: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477.c | 7 ++++--- net/dsa/tag_ksz.c | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 89ed059bb576..674d77e1b029 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -397,6 +397,7 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, struct ksz_port *p = &dev->ports[port]; u8 data; int member = -1; + int forward = dev->member; ksz_pread8(dev, port, P_STP_CTRL, &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); @@ -464,10 +465,10 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, } /* When topology has changed the function ksz_update_port_member - * should be called to modify port forwarding behavior. However - * as the offload_fwd_mark indication cannot be reported here - * the switch forwarding function is not enabled. + * should be called to modify port forwarding behavior. */ + if (forward != dev->member) + ksz_update_port_member(dev, port); } static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index da71b9e2af52..927e9c86f745 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); + skb->offload_fwd_mark = true; + return skb; } From f6675872db57305fa957021efc788f9983ed3b67 Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Wed, 6 Feb 2019 20:00:13 +0100 Subject: [PATCH 0955/1436] tracing: probeevent: Correctly update remaining space in dynamic area Commit 9178412ddf5a ("tracing: probeevent: Return consumed bytes of dynamic area") improved the string fetching mechanism by returning the number of required bytes after copying the argument to the dynamic area. However, this return value is now only used to increment the pointer inside the dynamic area but misses updating the 'maxlen' variable which indicates the remaining space in the dynamic area. This means that fetch_store_string() always reads the *total* size of the dynamic area from the data_loc pointer instead of the *remaining* size (and passes it along to strncpy_from_{user,unsafe}) even if we're already about to copy data into the middle of the dynamic area. Link: http://lkml.kernel.org/r/20190206190013.16405-1-andreas.ziegler@fau.de Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 9178412ddf5a ("tracing: probeevent: Return consumed bytes of dynamic area") Acked-by: Masami Hiramatsu Signed-off-by: Andreas Ziegler Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe_tmpl.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 5c56afc17cf8..4737bb8c07a3 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -180,10 +180,12 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs, if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); ret = process_fetch_insn(arg->code, regs, dl, base); - if (unlikely(ret < 0 && arg->dynamic)) + if (unlikely(ret < 0 && arg->dynamic)) { *dl = make_data_loc(0, dyndata - base); - else + } else { dyndata += ret; + maxlen -= ret; + } } } From cf657d22ee1f0e887326a92169f2e28dc932fd10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Feb 2019 12:41:05 -0800 Subject: [PATCH 0956/1436] net/x25: do not hold the cpu too long in x25_new_lci() Due to quadratic behavior of x25_new_lci(), syzbot was able to trigger an rcu stall. Fix this by not blocking BH for the whole duration of the function, and inserting a reschedule point when possible. If we care enough, using a bitmap could get rid of the quadratic behavior. syzbot report : rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-...!: (10500 ticks this GP) idle=4fa/1/0x4000000000000002 softirq=283376/283376 fqs=0 rcu: (t=10501 jiffies g=383105 q=136) rcu: rcu_preempt kthread starved for 10502 jiffies! g383105 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0 rcu: RCU grace-period kthread stack dump: rcu_preempt I28928 10 2 0x80000000 Call Trace: context_switch kernel/sched/core.c:2844 [inline] __schedule+0x817/0x1cc0 kernel/sched/core.c:3485 schedule+0x92/0x180 kernel/sched/core.c:3529 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803 rcu_gp_fqs_loop kernel/rcu/tree.c:1948 [inline] rcu_gp_kthread+0x956/0x17a0 kernel/rcu/tree.c:2105 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 NMI backtrace for cpu 0 CPU: 0 PID: 8759 Comm: syz-executor2 Not tainted 5.0.0-rc4+ #51 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1211 print_cpu_stall kernel/rcu/tree.c:1348 [inline] check_cpu_stall kernel/rcu/tree.c:1422 [inline] rcu_pending kernel/rcu/tree.c:3018 [inline] rcu_check_callbacks.cold+0x500/0xa4a kernel/rcu/tree.c:2521 update_process_times+0x32/0x80 kernel/time/timer.c:1635 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271 __run_hrtimer kernel/time/hrtimer.c:1389 [inline] __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline] smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 RIP: 0010:__read_once_size include/linux/compiler.h:193 [inline] RIP: 0010:queued_write_lock_slowpath+0x13e/0x290 kernel/locking/qrwlock.c:86 Code: 00 00 fc ff df 4c 8d 2c 01 41 83 c7 03 41 0f b6 45 00 41 38 c7 7c 08 84 c0 0f 85 0c 01 00 00 8b 03 3d 00 01 00 00 74 1a f3 90 <41> 0f b6 55 00 41 38 d7 7c eb 84 d2 74 e7 48 89 df e8 6c 0f 4f 00 RSP: 0018:ffff88805f117bd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000300 RBX: ffffffff89413ba0 RCX: 1ffffffff1282774 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff89413ba0 RBP: ffff88805f117c70 R08: 1ffffffff1282774 R09: fffffbfff1282775 R10: fffffbfff1282774 R11: ffffffff89413ba3 R12: 00000000000000ff R13: fffffbfff1282774 R14: 1ffff1100be22f7d R15: 0000000000000003 queued_write_lock include/asm-generic/qrwlock.h:104 [inline] do_raw_write_lock+0x1d6/0x290 kernel/locking/spinlock_debug.c:203 __raw_write_lock_bh include/linux/rwlock_api_smp.h:204 [inline] _raw_write_lock_bh+0x3b/0x50 kernel/locking/spinlock.c:312 x25_insert_socket+0x21/0xe0 net/x25/af_x25.c:267 x25_bind+0x273/0x340 net/x25/af_x25.c:705 __sys_bind+0x23f/0x290 net/socket.c:1505 __do_sys_bind net/socket.c:1516 [inline] __se_sys_bind net/socket.c:1514 [inline] __x64_sys_bind+0x73/0xb0 net/socket.c:1514 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e39 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fafccd0dc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000031 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e39 RDX: 0000000000000012 RSI: 0000000020000240 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fafccd0e6d4 R13: 00000000004bdf8b R14: 00000000004ce4b8 R15: 00000000ffffffff Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 8752 Comm: syz-executor4 Not tainted 5.0.0-rc4+ #51 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__x25_find_socket+0x78/0x120 net/x25/af_x25.c:328 Code: 89 f8 48 c1 e8 03 80 3c 18 00 0f 85 a6 00 00 00 4d 8b 64 24 68 4d 85 e4 74 7f e8 03 97 3d fb 49 83 ec 68 74 74 e8 f8 96 3d fb <49> 8d bc 24 88 04 00 00 48 89 f8 48 c1 e8 03 0f b6 04 18 84 c0 74 RSP: 0018:ffff8880639efc58 EFLAGS: 00000246 RAX: 0000000000040000 RBX: dffffc0000000000 RCX: ffffc9000e677000 RDX: 0000000000040000 RSI: ffffffff863244b8 RDI: ffff88806a764628 RBP: ffff8880639efc80 R08: ffff8880a80d05c0 R09: fffffbfff1282775 R10: fffffbfff1282774 R11: ffffffff89413ba3 R12: ffff88806a7645c0 R13: 0000000000000001 R14: ffff88809f29ac00 R15: 0000000000000000 FS: 00007fe8d0c58700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32823000 CR3: 00000000672eb000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: x25_new_lci net/x25/af_x25.c:357 [inline] x25_connect+0x374/0xdf0 net/x25/af_x25.c:786 __sys_connect+0x266/0x330 net/socket.c:1686 __do_sys_connect net/socket.c:1697 [inline] __se_sys_connect net/socket.c:1694 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1694 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457e39 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe8d0c57c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457e39 RDX: 0000000000000012 RSI: 0000000020000200 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe8d0c586d4 R13: 00000000004be378 R14: 00000000004ceb00 R15: 00000000ffffffff Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Andrew Hendry Cc: linux-x25@vger.kernel.org Signed-off-by: David S. Miller --- net/x25/af_x25.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5121729b8b63..ec3a828672ef 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } From b904aada8033749558e3aaeb574a2200bf0458f0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 8 Feb 2019 15:15:00 -0600 Subject: [PATCH 0957/1436] devlink: Add WARN_ON to catch errors of not cleaning devlink objects Add WARN_ON to make sure that all sub objects of a devlink device are cleanedup before freeing the devlink device. This helps to catch any driver bugs. Signed-off-by: Parav Pandit Signed-off-by: David S. Miller --- net/core/devlink.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/devlink.c b/net/core/devlink.c index e6a015b8ac9b..0de1edb65c24 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5241,6 +5241,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + WARN_ON(!list_empty(&devlink->reporter_list)); + WARN_ON(!list_empty(&devlink->region_list)); + WARN_ON(!list_empty(&devlink->param_list)); + WARN_ON(!list_empty(&devlink->resource_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->port_list)); + kfree(devlink); } EXPORT_SYMBOL_GPL(devlink_free); From 83cd203084f68707839c00df120eed367d49e311 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 6 Feb 2019 10:29:17 -0800 Subject: [PATCH 0958/1436] Input: qt2160 - switch to using brightness_set_blocking() Now that LEDs core allows "blocking" flavor of "set brightness" method we can use it and get rid of private work items. Reviewed-by: Sven Van Asbroeck Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/qt2160.c | 81 ++++++++++++++------------------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c index 43b86482dda0..d466bc07aebb 100644 --- a/drivers/input/keyboard/qt2160.c +++ b/drivers/input/keyboard/qt2160.c @@ -58,10 +58,9 @@ static unsigned char qt2160_key2code[] = { struct qt2160_led { struct qt2160_data *qt2160; struct led_classdev cdev; - struct work_struct work; char name[32]; int id; - enum led_brightness new_brightness; + enum led_brightness brightness; }; #endif @@ -74,7 +73,6 @@ struct qt2160_data { u16 key_matrix; #ifdef CONFIG_LEDS_CLASS struct qt2160_led leds[QT2160_NUM_LEDS_X]; - struct mutex led_lock; #endif }; @@ -83,46 +81,39 @@ static int qt2160_write(struct i2c_client *client, u8 reg, u8 data); #ifdef CONFIG_LEDS_CLASS -static void qt2160_led_work(struct work_struct *work) -{ - struct qt2160_led *led = container_of(work, struct qt2160_led, work); - struct qt2160_data *qt2160 = led->qt2160; - struct i2c_client *client = qt2160->client; - int value = led->new_brightness; - u32 drive, pwmen; - - mutex_lock(&qt2160->led_lock); - - drive = qt2160_read(client, QT2160_CMD_DRIVE_X); - pwmen = qt2160_read(client, QT2160_CMD_PWMEN_X); - if (value != LED_OFF) { - drive |= (1 << led->id); - pwmen |= (1 << led->id); - - } else { - drive &= ~(1 << led->id); - pwmen &= ~(1 << led->id); - } - qt2160_write(client, QT2160_CMD_DRIVE_X, drive); - qt2160_write(client, QT2160_CMD_PWMEN_X, pwmen); - - /* - * Changing this register will change the brightness - * of every LED in the qt2160. It's a HW limitation. - */ - if (value != LED_OFF) - qt2160_write(client, QT2160_CMD_PWM_DUTY, value); - - mutex_unlock(&qt2160->led_lock); -} - -static void qt2160_led_set(struct led_classdev *cdev, - enum led_brightness value) +static int qt2160_led_set(struct led_classdev *cdev, + enum led_brightness value) { struct qt2160_led *led = container_of(cdev, struct qt2160_led, cdev); + struct qt2160_data *qt2160 = led->qt2160; + struct i2c_client *client = qt2160->client; + u32 drive, pwmen; - led->new_brightness = value; - schedule_work(&led->work); + if (value != led->brightness) { + drive = qt2160_read(client, QT2160_CMD_DRIVE_X); + pwmen = qt2160_read(client, QT2160_CMD_PWMEN_X); + if (value != LED_OFF) { + drive |= BIT(led->id); + pwmen |= BIT(led->id); + + } else { + drive &= ~BIT(led->id); + pwmen &= ~BIT(led->id); + } + qt2160_write(client, QT2160_CMD_DRIVE_X, drive); + qt2160_write(client, QT2160_CMD_PWMEN_X, pwmen); + + /* + * Changing this register will change the brightness + * of every LED in the qt2160. It's a HW limitation. + */ + if (value != LED_OFF) + qt2160_write(client, QT2160_CMD_PWM_DUTY, value); + + led->brightness = value; + } + + return 0; } #endif /* CONFIG_LEDS_CLASS */ @@ -293,20 +284,16 @@ static int qt2160_register_leds(struct qt2160_data *qt2160) int ret; int i; - mutex_init(&qt2160->led_lock); - for (i = 0; i < QT2160_NUM_LEDS_X; i++) { struct qt2160_led *led = &qt2160->leds[i]; snprintf(led->name, sizeof(led->name), "qt2160:x%d", i); led->cdev.name = led->name; - led->cdev.brightness_set = qt2160_led_set; + led->cdev.brightness_set_blocking = qt2160_led_set; led->cdev.brightness = LED_OFF; led->id = i; led->qt2160 = qt2160; - INIT_WORK(&led->work, qt2160_led_work); - ret = led_classdev_register(&client->dev, &led->cdev); if (ret < 0) return ret; @@ -324,10 +311,8 @@ static void qt2160_unregister_leds(struct qt2160_data *qt2160) { int i; - for (i = 0; i < QT2160_NUM_LEDS_X; i++) { + for (i = 0; i < QT2160_NUM_LEDS_X; i++) led_classdev_unregister(&qt2160->leds[i].cdev); - cancel_work_sync(&qt2160->leds[i].work); - } } #else From f420c54e4b12c1361c6ed313002ee7bd7ac58362 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 11 Feb 2019 14:32:40 -0800 Subject: [PATCH 0959/1436] Revert "Input: elan_i2c - add ACPI ID for touchpad in ASUS Aspire F5-573G" This reverts commit 7db54c89f0b30a101584e09d3729144e6170059d as it breaks Acer Aspire V-371 and other devices. According to Elan: "Acer Aspire F5-573G is MS Precision touchpad which should use hid multitouch driver. ELAN0501 should not be added in elan_i2c." Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202503 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index f322a1768fbb..a94b6494e71a 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1336,7 +1336,6 @@ MODULE_DEVICE_TABLE(i2c, elan_id); static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, - { "ELAN0501", 0 }, { "ELAN0600", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, From 1fbf1252df0e4212e17f98130cb5372b3bc019b0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 6 Feb 2019 15:00:19 -0800 Subject: [PATCH 0960/1436] mlx5: use RCU lock in mlx5_eq_cq_get() mlx5_eq_cq_get() is called in IRQ handler, the spinlock inside gets a lot of contentions when we test some heavy workload with 60 RX queues and 80 CPU's, and it is clearly shown in the flame graph. In fact, radix_tree_lookup() is perfectly fine with RCU read lock, we don't have to take a spinlock on this hot path. This is pretty much similar to commit 291c566a2891 ("net/mlx4_core: Fix racy CQ (Completion Queue) free"). Slow paths are still serialized with the spinlock, and with synchronize_irq() it should be safe to just move the fast path to RCU read lock. This patch itself reduces the latency by about 50% for our memcached workload on a 4.14 kernel we test. In upstream, as pointed out by Saeed, this spinlock gets some rework in commit 02d92f790364 ("net/mlx5: CQ Database per EQ"), so the difference could be smaller. Cc: Saeed Mahameed Cc: Tariq Toukan Acked-by: Saeed Mahameed Signed-off-by: Cong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ee04aab65a9f..7092457705a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -114,11 +114,11 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *cq = NULL; - spin_lock(&table->lock); + rcu_read_lock(); cq = radix_tree_lookup(&table->tree, cqn); if (likely(cq)) mlx5_cq_hold(cq); - spin_unlock(&table->lock); + rcu_read_unlock(); return cq; } @@ -371,9 +371,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; int err; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); return err; } @@ -383,9 +383,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); if (!tmp) { mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); From e3613bb8afc2a9474c9214d65c8326c5ac02135e Mon Sep 17 00:00:00 2001 From: Stefan O'Rear Date: Sun, 16 Dec 2018 13:03:36 -0500 Subject: [PATCH 0961/1436] riscv: Add pte bit to distinguish swap from invalid Previously, invalid PTEs and swap PTEs had the same binary representation, causing errors when attempting to unmap PROT_NONE mappings, including implicit unmap on exit. Typical error: swap_info_get: Bad swap file entry 40000000007a9879 BUG: Bad page map in process a.out pte:3d4c3cc0 pmd:3e521401 Cc: stable@vger.kernel.org Signed-off-by: Stefan O'Rear Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-bits.h | 6 ++++++ arch/riscv/include/asm/pgtable.h | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 2fa2942be221..470755cb7558 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -35,6 +35,12 @@ #define _PAGE_SPECIAL _PAGE_SOFT #define _PAGE_TABLE _PAGE_PRESENT +/* + * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to + * distinguish them from swapped out pages + */ +#define _PAGE_PROT_NONE _PAGE_READ + #define _PAGE_PFN_SHIFT 10 /* Set of bits to preserve across pte_modify() */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 16301966d65b..a8179a8c1491 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -44,7 +44,7 @@ /* Page protection bits */ #define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) -#define PAGE_NONE __pgprot(0) +#define PAGE_NONE __pgprot(_PAGE_PROT_NONE) #define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ) #define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE) #define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC) @@ -98,7 +98,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; static inline int pmd_present(pmd_t pmd) { - return (pmd_val(pmd) & _PAGE_PRESENT); + return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); } static inline int pmd_none(pmd_t pmd) @@ -178,7 +178,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) static inline int pte_present(pte_t pte) { - return (pte_val(pte) & _PAGE_PRESENT); + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); } static inline int pte_none(pte_t pte) @@ -380,7 +380,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * * Format of swap PTE: * bit 0: _PAGE_PRESENT (zero) - * bit 1: reserved for future use (zero) + * bit 1: _PAGE_PROT_NONE (zero) * bits 2 to 6: swap type * bits 7 to XLEN-1: swap offset */ From 41fb9d54f12b87fb1f670653e95d34668a08e8ee Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 8 Feb 2019 09:11:08 -0800 Subject: [PATCH 0962/1436] Revert "RISC-V: Make BSS section as the last section in vmlinux.lds.S" At least BBL relies on the flat binaries containing all the bytes in the actual image to exist in the file. Before this revert the flat images dropped the trailing zeros, which caused BBL to put its copy of the device tree where Linux thought the BSS was, which wreaks all sorts of havoc. Manifesting the bug is a bit subtle because BBL aligns everything to 2MiB page boundaries, but with large enough kernels you're almost certain to get bitten by the bug. While moving the sections around isn't a great long-term fix, it will at least avoid producing broken images. This reverts commit 22e6a2e14cb8ebcae059488cf24e778e4058c2bf. Signed-off-by: Palmer Dabbelt Reviewed-by: Christoph Hellwig --- arch/riscv/kernel/vmlinux.lds.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 1e1395d63dab..65df1dfdc303 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -18,8 +18,6 @@ #include #include -#define MAX_BYTES_PER_LONG 0x10 - OUTPUT_ARCH(riscv) ENTRY(_start) @@ -76,6 +74,8 @@ SECTIONS *(.sbss*) } + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + EXCEPTION_TABLE(0x10) NOTES @@ -83,10 +83,6 @@ SECTIONS *(.rel.dyn*) } - BSS_SECTION(MAX_BYTES_PER_LONG, - MAX_BYTES_PER_LONG, - MAX_BYTES_PER_LONG) - _end = .; STABS_DEBUG From 8d29d16d21342a0c86405d46de0c4ac5daf1760f Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Sun, 10 Feb 2019 11:58:29 -0800 Subject: [PATCH 0963/1436] netfilter: compat: initialize all fields in xt_init If a non zero value happens to be in xt[NFPROTO_BRIDGE].cur at init time, the following panic can be caused by running % ebtables -t broute -F BROUTING from a 32-bit user level on a 64-bit kernel. This patch replaces kmalloc_array with kcalloc when allocating xt. [ 474.680846] BUG: unable to handle kernel paging request at 0000000009600920 [ 474.687869] PGD 2037006067 P4D 2037006067 PUD 2038938067 PMD 0 [ 474.693838] Oops: 0000 [#1] SMP [ 474.697055] CPU: 9 PID: 4662 Comm: ebtables Kdump: loaded Not tainted 4.19.17-11302235.AroraKernelnext.fc18.x86_64 #1 [ 474.707721] Hardware name: Supermicro X9DRT/X9DRT, BIOS 3.0 06/28/2013 [ 474.714313] RIP: 0010:xt_compat_calc_jump+0x2f/0x63 [x_tables] [ 474.720201] Code: 40 0f b6 ff 55 31 c0 48 6b ff 70 48 03 3d dc 45 00 00 48 89 e5 8b 4f 6c 4c 8b 47 60 ff c9 39 c8 7f 2f 8d 14 08 d1 fa 48 63 fa <41> 39 34 f8 4c 8d 0c fd 00 00 00 00 73 05 8d 42 01 eb e1 76 05 8d [ 474.739023] RSP: 0018:ffffc9000943fc58 EFLAGS: 00010207 [ 474.744296] RAX: 0000000000000000 RBX: ffffc90006465000 RCX: 0000000002580249 [ 474.751485] RDX: 00000000012c0124 RSI: fffffffff7be17e9 RDI: 00000000012c0124 [ 474.758670] RBP: ffffc9000943fc58 R08: 0000000000000000 R09: ffffffff8117cf8f [ 474.765855] R10: ffffc90006477000 R11: 0000000000000000 R12: 0000000000000001 [ 474.773048] R13: 0000000000000000 R14: ffffc9000943fcb8 R15: ffffc9000943fcb8 [ 474.780234] FS: 0000000000000000(0000) GS:ffff88a03f840000(0063) knlGS:00000000f7ac7700 [ 474.788612] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 474.794632] CR2: 0000000009600920 CR3: 0000002037422006 CR4: 00000000000606e0 [ 474.802052] Call Trace: [ 474.804789] compat_do_replace+0x1fb/0x2a3 [ebtables] [ 474.810105] compat_do_ebt_set_ctl+0x69/0xe6 [ebtables] [ 474.815605] ? try_module_get+0x37/0x42 [ 474.819716] compat_nf_setsockopt+0x4f/0x6d [ 474.824172] compat_ip_setsockopt+0x7e/0x8c [ 474.828641] compat_raw_setsockopt+0x16/0x3a [ 474.833220] compat_sock_common_setsockopt+0x1d/0x24 [ 474.838458] __compat_sys_setsockopt+0x17e/0x1b1 [ 474.843343] ? __check_object_size+0x76/0x19a [ 474.847960] __ia32_compat_sys_socketcall+0x1cb/0x25b [ 474.853276] do_fast_syscall_32+0xaf/0xf6 [ 474.857548] entry_SYSENTER_compat+0x6b/0x7a Signed-off-by: Francesco Ruggeri Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index aecadd471e1d..13e1ac333fa4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1899,7 +1899,7 @@ static int __init xt_init(void) seqcount_init(&per_cpu(xt_recseq, i)); } - xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); + xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; From 48ab807c792fa9074a46cfdc837023fadb46fb73 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Feb 2019 13:13:11 +0000 Subject: [PATCH 0964/1436] netfilter: conntrack: fix indentation issue A statement in an if block is not indented correctly. Fix this. Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8071bb04a849..349b42a65c8a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2675,7 +2675,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { l4proto = nf_ct_l4proto_find(tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); + ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); From 6fde9df6b76eaf2395d158628d5f915857981a44 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:44:56 -0600 Subject: [PATCH 0965/1436] ipvs: Use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 432141f04af3..446beeb5e7b2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2722,8 +2722,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_services *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_service_entry) * get->num_services; + size = struct_size(get, entrytable, get->num_services); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; @@ -2764,8 +2763,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_dests *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_dest_entry) * get->num_dests; + size = struct_size(get, entrytable, get->num_dests); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; From 6ca64ef37da930ad0fd0f2c3dac1a4607e0af494 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:56:08 -0600 Subject: [PATCH 0966/1436] netfilter: xt_recent: Use struct_size() in kvzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; size = sizeof(struct foo) + count * sizeof(void *); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); instance = alloc(size, GFP_KERNEL) Notice that, in this case, variable sz is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index f44de4bc2100..1664d2ec8b2f 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -337,7 +337,6 @@ static int recent_mt_check(const struct xt_mtchk_param *par, unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; - size_t sz; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); @@ -387,8 +386,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } - sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - t = kvzalloc(sz, GFP_KERNEL); + t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; From 7384b538d3aed2ed49d3575483d17aeee790fb06 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 11 Feb 2019 09:18:28 +0700 Subject: [PATCH 0967/1436] tipc: fix skb may be leaky in tipc_link_input When we free skb at tipc_data_input, we return a 'false' boolean. Then, skb passed to subcalling tipc_link_input in tipc_link_rcv, 1303 int tipc_link_rcv: ... 1354 if (!tipc_data_input(l, skb, l->inputq)) 1355 rc |= tipc_link_input(l, skb, l->inputq); Fix it by simple changing to a 'true' boolean when skb is being free-ed. Then, tipc_link_rcv will bypassed to subcalling tipc_link_input as above condition. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 2792a3cae682..7c70034b1073 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } From aef1897cd36dcf5e296f1d2bae7e0d268561b685 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Tue, 12 Feb 2019 09:56:25 +0800 Subject: [PATCH 0968/1436] blk-mq: insert rq with DONTPREP to hctx dispatch list when requeue When requeue, if RQF_DONTPREP, rq has contained some driver specific data, so insert it to hctx dispatch list to avoid any merge. Take scsi as example, here is the trace event log (no io scheduler, because RQF_STARTED would prevent merging), kworker/0:1H-339 [000] ...1 2037.209289: block_rq_insert: 8,0 R 4096 () 32768 + 8 [kworker/0:1H] scsi_inert_test-1987 [000] .... 2037.220465: block_bio_queue: 8,0 R 32776 + 8 [scsi_inert_test] scsi_inert_test-1987 [000] ...2 2037.220466: block_bio_backmerge: 8,0 R 32776 + 8 [scsi_inert_test] kworker/0:1H-339 [000] .... 2047.220913: block_rq_issue: 8,0 R 8192 () 32768 + 16 [kworker/0:1H] scsi_inert_test-1996 [000] ..s1 2047.221007: block_rq_complete: 8,0 R () 32768 + 8 [0] scsi_inert_test-1996 [000] .Ns1 2047.221045: block_rq_requeue: 8,0 R () 32776 + 8 [0] kworker/0:1H-339 [000] ...1 2047.221054: block_rq_insert: 8,0 R 4096 () 32776 + 8 [kworker/0:1H] kworker/0:1H-339 [000] ...1 2047.221056: block_rq_issue: 8,0 R 4096 () 32776 + 8 [kworker/0:1H] scsi_inert_test-1986 [000] ..s1 2047.221119: block_rq_complete: 8,0 R () 32776 + 8 [0] (32768 + 8) was requeued by scsi_queue_insert and had RQF_DONTPREP. Then it was merged with (32776 + 8) and issued. Due to RQF_DONTPREP, the sdb only contained the part of (32768 + 8), then only that part was completed. The lucky thing was that scsi_io_completion detected it and requeued the remaining part. So we didn't get corrupted data. However, the requeue of (32776 + 8) is not expected. Suggested-by: Jens Axboe Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-mq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8f5b533764ca..9437a5eb07cf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -737,12 +737,20 @@ static void blk_mq_requeue_work(struct work_struct *work) spin_unlock_irq(&q->requeue_lock); list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - if (!(rq->rq_flags & RQF_SOFTBARRIER)) + if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) continue; rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, true, false, false); + /* + * If RQF_DONTPREP, rq has contained some driver specific + * data, so insert it to hctx dispatch list to avoid any + * merge. + */ + if (rq->rq_flags & RQF_DONTPREP) + blk_mq_request_bypass_insert(rq, false); + else + blk_mq_sched_insert_request(rq, true, false, false); } while (!list_empty(&rq_list)) { From 1727a9dce6775a4ea77a611e9ea51cac1d093519 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 11 Feb 2019 12:01:18 +0000 Subject: [PATCH 0969/1436] selftests: bpf: add "alu32" to .gitignore "alu32" is a build dir and contains various files for BPF sub-register code-gen testing. This patch tells git to ignore it. Suggested-by: Yonghong Song Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index dd093bd91aa9..e47168d1257d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -29,3 +29,4 @@ test_netcnt test_section_names test_tcpnotify_user test_libbpf +alu32 From 4836b4637ef080c2764c44ee40ed354cdb991d79 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 11 Feb 2019 12:01:19 +0000 Subject: [PATCH 0970/1436] selftests: bpf: extend sub-register mode compilation to all bpf object files At the moment, we only do extra sub-register mode compilation on bpf object files used by "test_progs". These object files are really loaded and executed. This patch further extends sub-register mode compilation to all bpf object files, even those without corresponding runtime tests. Because this could help testing LLVM sub-register code-gen, kernel bpf selftest has much more C testcases with reasonable size and complexity compared with LLVM testsuite which only contains unit tests. There were some file duplication inside BPF_OBJ_FILES_DUAL_COMPILE which is removed now. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c7e1e3255448..f2c11474f762 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -36,20 +36,14 @@ BPF_OBJ_FILES = \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o \ - test_sock_fields_kern.o - -# Objects are built with default compilation flags and with sub-register -# code-gen enabled. -BPF_OBJ_FILES_DUAL_COMPILE = \ - test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \ - test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \ + test_pkt_access.o test_xdp.o test_adjust_tail.o test_l4lb.o \ + test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \ test_obj_id.o test_pkt_md_access.o test_tracepoint.o \ - test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \ - test_stacktrace_build_id.o test_get_stack_rawtp.o \ - test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \ - test_queue_map.o test_stack_map.o + test_stacktrace_map.o test_stacktrace_build_id.o \ + test_get_stack_rawtp.o test_sk_lookup_kern.o test_queue_map.o \ + test_stack_map.o test_sock_fields_kern.o -TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE) +TEST_GEN_FILES = $(BPF_OBJ_FILES) # Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor # support which is the first version to contain both ALU32 and JMP32 @@ -59,7 +53,7 @@ SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ $(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \ grep 'if w') ifneq ($(SUBREG_CODEGEN),) -TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE)) +TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES)) endif # Order correspond to 'make run_tests' order From bd4aed0ee73ca873bef3cb3ec746dd796f03df28 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 11 Feb 2019 12:01:20 +0000 Subject: [PATCH 0971/1436] selftests: bpf: centre kernel bpf objects under new subdir "progs" At the moment, all kernel bpf objects are listed under BPF_OBJ_FILES. Listing them manually sometimes causing patch conflict when people are adding new testcases simultaneously. It is better to centre all the related source files under a subdir "progs", then auto-generate the object file list. Suggested-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 26 ++++--------------- .../selftests/bpf/{ => progs}/bpf_flow.c | 0 .../selftests/bpf/{ => progs}/connect4_prog.c | 0 .../selftests/bpf/{ => progs}/connect6_prog.c | 0 .../selftests/bpf/{ => progs}/dev_cgroup.c | 0 .../bpf/{ => progs}/get_cgroup_id_kern.c | 0 .../selftests/bpf/{ => progs}/netcnt_prog.c | 0 .../bpf/{ => progs}/sample_map_ret0.c | 0 .../selftests/bpf/{ => progs}/sample_ret0.c | 0 .../selftests/bpf/{ => progs}/sendmsg4_prog.c | 0 .../selftests/bpf/{ => progs}/sendmsg6_prog.c | 0 .../bpf/{ => progs}/socket_cookie_prog.c | 0 .../bpf/{ => progs}/sockmap_parse_prog.c | 0 .../bpf/{ => progs}/sockmap_tcp_msg_prog.c | 0 .../bpf/{ => progs}/sockmap_verdict_prog.c | 0 .../bpf/{ => progs}/test_adjust_tail.c | 0 .../bpf/{ => progs}/test_btf_haskv.c | 0 .../selftests/bpf/{ => progs}/test_btf_nokv.c | 0 .../bpf/{ => progs}/test_get_stack_rawtp.c | 0 .../selftests/bpf/{ => progs}/test_l4lb.c | 0 .../bpf/{ => progs}/test_l4lb_noinline.c | 0 .../bpf/{ => progs}/test_lirc_mode2_kern.c | 0 .../bpf/{ => progs}/test_lwt_seg6local.c | 0 .../bpf/{ => progs}/test_map_in_map.c | 0 .../selftests/bpf/{ => progs}/test_map_lock.c | 0 .../selftests/bpf/{ => progs}/test_obj_id.c | 0 .../bpf/{ => progs}/test_pkt_access.c | 0 .../bpf/{ => progs}/test_pkt_md_access.c | 0 .../bpf/{ => progs}/test_queue_map.c | 0 .../{ => progs}/test_select_reuseport_kern.c | 0 .../bpf/{ => progs}/test_sk_lookup_kern.c | 0 .../bpf/{ => progs}/test_skb_cgroup_id_kern.c | 0 .../bpf/{ => progs}/test_sock_fields_kern.c | 0 .../bpf/{ => progs}/test_sockhash_kern.c | 0 .../bpf/{ => progs}/test_sockmap_kern.c | 0 .../bpf/{ => progs}/test_spin_lock.c | 0 .../bpf/{ => progs}/test_stack_map.c | 0 .../{ => progs}/test_stacktrace_build_id.c | 0 .../bpf/{ => progs}/test_stacktrace_map.c | 0 .../bpf/{ => progs}/test_tcp_estats.c | 0 .../bpf/{ => progs}/test_tcpbpf_kern.c | 0 .../bpf/{ => progs}/test_tcpnotify_kern.c | 0 .../bpf/{ => progs}/test_tracepoint.c | 0 .../bpf/{ => progs}/test_tunnel_kern.c | 0 .../selftests/bpf/{ => progs}/test_xdp.c | 0 .../selftests/bpf/{ => progs}/test_xdp_meta.c | 0 .../bpf/{ => progs}/test_xdp_noinline.c | 0 .../bpf/{ => progs}/test_xdp_redirect.c | 0 .../selftests/bpf/{ => progs}/test_xdp_vlan.c | 0 .../selftests/bpf/{ => progs}/xdp_dummy.c | 0 50 files changed, 5 insertions(+), 21 deletions(-) rename tools/testing/selftests/bpf/{ => progs}/bpf_flow.c (100%) rename tools/testing/selftests/bpf/{ => progs}/connect4_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/connect6_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/dev_cgroup.c (100%) rename tools/testing/selftests/bpf/{ => progs}/get_cgroup_id_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/netcnt_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sample_map_ret0.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sample_ret0.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sendmsg4_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sendmsg6_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/socket_cookie_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sockmap_parse_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sockmap_tcp_msg_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/sockmap_verdict_prog.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_adjust_tail.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_btf_haskv.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_btf_nokv.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_get_stack_rawtp.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_l4lb.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_l4lb_noinline.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_lirc_mode2_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_lwt_seg6local.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_map_in_map.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_map_lock.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_obj_id.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_pkt_access.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_pkt_md_access.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_queue_map.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_select_reuseport_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_sk_lookup_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_skb_cgroup_id_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_sock_fields_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_sockhash_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_sockmap_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_spin_lock.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_stack_map.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_stacktrace_build_id.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_stacktrace_map.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_tcp_estats.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_tcpbpf_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_tcpnotify_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_tracepoint.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_tunnel_kern.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_xdp.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_xdp_meta.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_xdp_noinline.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_xdp_redirect.c (100%) rename tools/testing/selftests/bpf/{ => progs}/test_xdp_vlan.c (100%) rename tools/testing/selftests/bpf/{ => progs}/xdp_dummy.c (100%) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f2c11474f762..575746e63544 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -25,24 +25,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields -BPF_OBJ_FILES = \ - test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \ - test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \ - test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \ - test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \ - sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ - get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ - xdp_dummy.o test_map_in_map.o test_spin_lock.o test_map_lock.o \ - test_pkt_access.o test_xdp.o test_adjust_tail.o test_l4lb.o \ - test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \ - test_obj_id.o test_pkt_md_access.o test_tracepoint.o \ - test_stacktrace_map.o test_stacktrace_build_id.o \ - test_get_stack_rawtp.o test_sk_lookup_kern.o test_queue_map.o \ - test_stack_map.o test_sock_fields_kern.o - +BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) # Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor @@ -184,7 +167,8 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \ $(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \ trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS) -$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32 +$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR) \ + $(ALU32_BUILD_DIR)/test_progs_32 $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ @@ -196,7 +180,7 @@ endif # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully -$(OUTPUT)/test_xdp.o: test_xdp.c +$(OUTPUT)/test_xdp.o: progs/test_xdp.c $(CLANG) $(CLANG_FLAGS) \ -O2 -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ @@ -204,7 +188,7 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -$(OUTPUT)/%.o: %.c +$(OUTPUT)/%.o: progs/%.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c similarity index 100% rename from tools/testing/selftests/bpf/bpf_flow.c rename to tools/testing/selftests/bpf/progs/bpf_flow.c diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c similarity index 100% rename from tools/testing/selftests/bpf/connect4_prog.c rename to tools/testing/selftests/bpf/progs/connect4_prog.c diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c similarity index 100% rename from tools/testing/selftests/bpf/connect6_prog.c rename to tools/testing/selftests/bpf/progs/connect6_prog.c diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c similarity index 100% rename from tools/testing/selftests/bpf/dev_cgroup.c rename to tools/testing/selftests/bpf/progs/dev_cgroup.c diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c similarity index 100% rename from tools/testing/selftests/bpf/get_cgroup_id_kern.c rename to tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c similarity index 100% rename from tools/testing/selftests/bpf/netcnt_prog.c rename to tools/testing/selftests/bpf/progs/netcnt_prog.c diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c similarity index 100% rename from tools/testing/selftests/bpf/sample_map_ret0.c rename to tools/testing/selftests/bpf/progs/sample_map_ret0.c diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/progs/sample_ret0.c similarity index 100% rename from tools/testing/selftests/bpf/sample_ret0.c rename to tools/testing/selftests/bpf/progs/sample_ret0.c diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c similarity index 100% rename from tools/testing/selftests/bpf/sendmsg4_prog.c rename to tools/testing/selftests/bpf/progs/sendmsg4_prog.c diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c similarity index 100% rename from tools/testing/selftests/bpf/sendmsg6_prog.c rename to tools/testing/selftests/bpf/progs/sendmsg6_prog.c diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c similarity index 100% rename from tools/testing/selftests/bpf/socket_cookie_prog.c rename to tools/testing/selftests/bpf/progs/socket_cookie_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c similarity index 100% rename from tools/testing/selftests/bpf/sockmap_parse_prog.c rename to tools/testing/selftests/bpf/progs/sockmap_parse_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c similarity index 100% rename from tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c rename to tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c similarity index 100% rename from tools/testing/selftests/bpf/sockmap_verdict_prog.c rename to tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c similarity index 100% rename from tools/testing/selftests/bpf/test_adjust_tail.c rename to tools/testing/selftests/bpf/progs/test_adjust_tail.c diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c similarity index 100% rename from tools/testing/selftests/bpf/test_btf_haskv.c rename to tools/testing/selftests/bpf/progs/test_btf_haskv.c diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c similarity index 100% rename from tools/testing/selftests/bpf/test_btf_nokv.c rename to tools/testing/selftests/bpf/progs/test_btf_nokv.c diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c similarity index 100% rename from tools/testing/selftests/bpf/test_get_stack_rawtp.c rename to tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c similarity index 100% rename from tools/testing/selftests/bpf/test_l4lb.c rename to tools/testing/selftests/bpf/progs/test_l4lb.c diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c similarity index 100% rename from tools/testing/selftests/bpf/test_l4lb_noinline.c rename to tools/testing/selftests/bpf/progs/test_l4lb_noinline.c diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_lirc_mode2_kern.c rename to tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c similarity index 100% rename from tools/testing/selftests/bpf/test_lwt_seg6local.c rename to tools/testing/selftests/bpf/progs/test_lwt_seg6local.c diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c similarity index 100% rename from tools/testing/selftests/bpf/test_map_in_map.c rename to tools/testing/selftests/bpf/progs/test_map_in_map.c diff --git a/tools/testing/selftests/bpf/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c similarity index 100% rename from tools/testing/selftests/bpf/test_map_lock.c rename to tools/testing/selftests/bpf/progs/test_map_lock.c diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c similarity index 100% rename from tools/testing/selftests/bpf/test_obj_id.c rename to tools/testing/selftests/bpf/progs/test_obj_id.c diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c similarity index 100% rename from tools/testing/selftests/bpf/test_pkt_access.c rename to tools/testing/selftests/bpf/progs/test_pkt_access.c diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c similarity index 100% rename from tools/testing/selftests/bpf/test_pkt_md_access.c rename to tools/testing/selftests/bpf/progs/test_pkt_md_access.c diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/progs/test_queue_map.c similarity index 100% rename from tools/testing/selftests/bpf/test_queue_map.c rename to tools/testing/selftests/bpf/progs/test_queue_map.c diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_select_reuseport_kern.c rename to tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_sk_lookup_kern.c rename to tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c rename to tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c diff --git a/tools/testing/selftests/bpf/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_sock_fields_kern.c rename to tools/testing/selftests/bpf/progs/test_sock_fields_kern.c diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/progs/test_sockhash_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_sockhash_kern.c rename to tools/testing/selftests/bpf/progs/test_sockhash_kern.c diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/progs/test_sockmap_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_sockmap_kern.c rename to tools/testing/selftests/bpf/progs/test_sockmap_kern.c diff --git a/tools/testing/selftests/bpf/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c similarity index 100% rename from tools/testing/selftests/bpf/test_spin_lock.c rename to tools/testing/selftests/bpf/progs/test_spin_lock.c diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/progs/test_stack_map.c similarity index 100% rename from tools/testing/selftests/bpf/test_stack_map.c rename to tools/testing/selftests/bpf/progs/test_stack_map.c diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c similarity index 100% rename from tools/testing/selftests/bpf/test_stacktrace_build_id.c rename to tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c similarity index 100% rename from tools/testing/selftests/bpf/test_stacktrace_map.c rename to tools/testing/selftests/bpf/progs/test_stacktrace_map.c diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c similarity index 100% rename from tools/testing/selftests/bpf/test_tcp_estats.c rename to tools/testing/selftests/bpf/progs/test_tcp_estats.c diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_tcpbpf_kern.c rename to tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_tcpnotify_kern.c rename to tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c similarity index 100% rename from tools/testing/selftests/bpf/test_tracepoint.c rename to tools/testing/selftests/bpf/progs/test_tracepoint.c diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c similarity index 100% rename from tools/testing/selftests/bpf/test_tunnel_kern.c rename to tools/testing/selftests/bpf/progs/test_tunnel_kern.c diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c similarity index 100% rename from tools/testing/selftests/bpf/test_xdp.c rename to tools/testing/selftests/bpf/progs/test_xdp.c diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c similarity index 100% rename from tools/testing/selftests/bpf/test_xdp_meta.c rename to tools/testing/selftests/bpf/progs/test_xdp_meta.c diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c similarity index 100% rename from tools/testing/selftests/bpf/test_xdp_noinline.c rename to tools/testing/selftests/bpf/progs/test_xdp_noinline.c diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c similarity index 100% rename from tools/testing/selftests/bpf/test_xdp_redirect.c rename to tools/testing/selftests/bpf/progs/test_xdp_redirect.c diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c similarity index 100% rename from tools/testing/selftests/bpf/test_xdp_vlan.c rename to tools/testing/selftests/bpf/progs/test_xdp_vlan.c diff --git a/tools/testing/selftests/bpf/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c similarity index 100% rename from tools/testing/selftests/bpf/xdp_dummy.c rename to tools/testing/selftests/bpf/progs/xdp_dummy.c From 64e39ee2c84bd8eac1bd3ea370c4ada5163befac Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Mon, 11 Feb 2019 12:01:21 +0000 Subject: [PATCH 0972/1436] selftests: bpf: relax sub-register mode compilation criteria Sub-register mode compilation was enabled only when there are eBPF "v3" processor supports at both compilation time inside LLVM and runtime inside kernel. Given separation betwen build and test server could be often, this patch removes the runtime support criteria. Suggested-by: Alexei Starovoitov Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 575746e63544..c3edf47da05d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -28,12 +28,11 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) -# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor -# support which is the first version to contain both ALU32 and JMP32 -# instructions. +# Also test sub-register code-gen if LLVM has eBPF v3 processor support which +# contains both ALU32 and JMP32 instructions. SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \ - $(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \ + $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \ grep 'if w') ifneq ($(SUBREG_CODEGEN),) TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES)) From e75913c93f7cd5f338ab373c34c93a655bd309cb Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Mon, 11 Feb 2019 10:57:46 +0800 Subject: [PATCH 0973/1436] net: fix IPv6 prefix route residue Follow those steps: # ip addr add 2001:123::1/32 dev eth0 # ip addr add 2001:123:456::2/64 dev eth0 # ip addr del 2001:123::1/32 dev eth0 # ip addr del 2001:123:456::2/64 dev eth0 and then prefix route of 2001:123::1/32 will still exist. This is because ipv6_prefix_equal in check_cleanup_prefix_route func does not check whether two IPv6 addresses have the same prefix length. If the prefix of one address starts with another shorter address prefix, even though their prefix lengths are different, the return value of ipv6_prefix_equal is true. Here I add a check of whether two addresses have the same prefix to decide whether their prefixes are equal. Fixes: 5b84efecb7d9 ("ipv6 addrconf: don't cleanup prefix route for IFA_F_NOPREFIXROUTE") Signed-off-by: Zhiqiang Liu Reported-by: Wenhao Zhang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 84c358804355..72ffd3d760ff 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1165,7 +1165,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) From 3ceb745baa4c90eb34dee983053fcbd2d8825a20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Feb 2019 19:35:27 -0800 Subject: [PATCH 0974/1436] devlink: fix condition for compat device info We need the port to be both ethernet and have the rigth netdev, not one or the other. Fixes: ddb6e99e2db1 ("ethtool: add compat for devlink info") Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 0de1edb65c24..61fab0dc0166 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6393,7 +6393,7 @@ void devlink_compat_running_version(struct net_device *dev, list_for_each_entry(devlink, &devlink_list, list) { mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { - if (devlink_port->type == DEVLINK_PORT_TYPE_ETH || + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH && devlink_port->type_dev == dev) { __devlink_compat_running_version(devlink, buf, len); From 68750561dd5622307bbf0c77bc4f80e7b2dfce0c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Feb 2019 19:35:28 -0800 Subject: [PATCH 0975/1436] devlink: don't allocate attrs on the stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Number of devlink attributes has grown over 128, causing the following warning: ../net/core/devlink.c: In function ‘devlink_nl_cmd_region_read_dumpit’: ../net/core/devlink.c:3740:1: warning: the frame size of 1064 bytes is larger than 1024 bytes [-Wframe-larger-than=] } ^ Since the number of attributes is only going to grow allocate the array dynamically. Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 61fab0dc0166..ec02459eea94 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3629,26 +3629,30 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - struct nlattr *attrs[DEVLINK_ATTR_MAX + 1]; const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; + struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); + attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); if (err) - goto out; + goto out_free; devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) - goto out; + goto out_free; mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); @@ -3710,6 +3714,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); + kfree(attrs); return skb->len; @@ -3718,7 +3723,8 @@ nla_put_failure: out_unlock: mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); -out: +out_free: + kfree(attrs); return 0; } From 14fd1901e718138b22ae7cbd8995bfdeb4df578f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Feb 2019 19:35:29 -0800 Subject: [PATCH 0976/1436] devlink: add a generic board.manufacture version name At Jiri's suggestion add a generic "board.manufacture" version identifier. Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/devlink-info-versions.rst | 5 +++++ include/net/devlink.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst index 7d4ecf6b6f34..c79ad8593383 100644 --- a/Documentation/networking/devlink-info-versions.rst +++ b/Documentation/networking/devlink-info-versions.rst @@ -14,6 +14,11 @@ board.rev Board design revision. +board.manufacture +================= + +An identifier of the company or the facility which produced the part. + fw.mgmt ======= diff --git a/include/net/devlink.h b/include/net/devlink.h index 2b384a38911b..07660fe4c0e3 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -435,6 +435,8 @@ enum devlink_param_wol_types { #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id" /* Revision of board design */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev" +/* Maker of the board */ +#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture" /* Control processor FW version */ #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt" From 05fe4ab75cc42c17c934755d6b5e5d34f0540578 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Feb 2019 19:35:30 -0800 Subject: [PATCH 0977/1436] nfp: devlink: use the generic manufacture identifier instead of vendor Vendor may sound ambiguous, let's rename the fab string to "board.manufacture" (which was just added as a generic identifier). Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index dddbb0575be9..bf4e124dbdd2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -178,7 +178,7 @@ static const struct nfp_devlink_versions_simple { } nfp_devlink_versions_hwinfo[] = { { DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, "assembly.partno", }, { DEVLINK_INFO_VERSION_GENERIC_BOARD_REV, "assembly.revision", }, - { "board.vendor", /* fab */ "assembly.vendor", }, + { DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", }, { "board.model", /* code name */ "assembly.model", }, }; From 1f5cf1036cbe4df7f078312561b5fefb2594a3bd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Feb 2019 19:35:31 -0800 Subject: [PATCH 0978/1436] nfp: devlink: include vendor/product info in serial number The manufacturing team requests we include vendor and product in the serial number field, as the serial number itself is not unique across manufacturing facilities and products. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_devlink.c | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index bf4e124dbdd2..080a301f379e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -258,18 +258,33 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); + const char *sn, *vendor, *part; struct nfp_nsp *nsp; char *buf = NULL; - const char *sn; int err; err = devlink_info_driver_name_put(req, "nfp"); if (err) return err; + vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); + part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); - if (sn) { - err = devlink_info_serial_number_put(req, sn); + if (vendor && part && sn) { + char *buf; + + buf = kmalloc(strlen(vendor) + strlen(part) + strlen(sn) + 1, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = '\0'; + strcat(buf, vendor); + strcat(buf, part); + strcat(buf, sn); + + err = devlink_info_serial_number_put(req, buf); + kfree(buf); if (err) return err; } From 91986ee166cf0816ae92668476ea7872d51b0c6e Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Mon, 11 Feb 2019 13:29:43 +0700 Subject: [PATCH 0979/1436] tipc: fix link session and re-establish issues When a link endpoint is re-created (e.g. after a node reboot or interface reset), the link session number is varied by random, the peer endpoint will be synced with this new session number before the link is re-established. However, there is a shortcoming in this mechanism that can lead to the link never re-established or faced with a failure then. It happens when the peer endpoint is ready in ESTABLISHING state, the 'peer_session' as well as the 'in_session' flag have been set, but suddenly this link endpoint leaves. When it comes back with a random session number, there are two situations possible: 1/ If the random session number is larger than (or equal to) the previous one, the peer endpoint will be updated with this new session upon receipt of a RESET_MSG from this endpoint, and the link can be re- established as normal. Otherwise, all the RESET_MSGs from this endpoint will be rejected by the peer. In turn, when this link endpoint receives one ACTIVATE_MSG from the peer, it will move to ESTABLISHED and start to send STATE_MSGs, but again these messages will be dropped by the peer due to wrong session. The peer link endpoint can still become ESTABLISHED after receiving a traffic message from this endpoint (e.g. a BCAST_PROTOCOL or NAME_DISTRIBUTOR), but since all the STATE_MSGs are invalid, the link will be forced down sooner or later! Even in case the random session number is larger than the previous one, it can be that the ACTIVATE_MSG from the peer arrives first, and this link endpoint moves quickly to ESTABLISHED without sending out any RESET_MSG yet. Consequently, the peer link will not be updated with the new session number, and the same link failure scenario as above will happen. 2/ Another situation can be that, the peer link endpoint was reset due to any reasons in the meantime, its link state was set to RESET from ESTABLISHING but still in session, i.e. the 'in_session' flag is not reset... Now, if the random session number from this endpoint is less than the previous one, all the RESET_MSGs from this endpoint will be rejected by the peer. In the other direction, when this link endpoint receives a RESET_MSG from the peer, it moves to ESTABLISHING and starts to send ACTIVATE_MSGs, but all these messages will be rejected by the peer too. As a result, the link cannot be re-established but gets stuck with this link endpoint in state ESTABLISHING and the peer in RESET! Solution: =========== This link endpoint should not go directly to ESTABLISHED when getting ACTIVATE_MSG from the peer which may belong to the old session if the link was re-created. To ensure the session to be correct before the link is re-established, the peer endpoint in ESTABLISHING state will send back the last session number in ACTIVATE_MSG for a verification at this endpoint. Then, if needed, a new and more appropriate session number will be regenerated to force a re-synch first. In addition, when a link in ESTABLISHING state is reset, its state will move to RESET according to the link FSM, along with resetting the 'in_session' flag (and the other data) as a normal link reset, it will also be deleted if requested. The solution is backward compatible. Acked-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 15 +++++++++++++++ net/tipc/msg.h | 22 ++++++++++++++++++++++ net/tipc/node.c | 11 ++++++----- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 7c70034b1073..85ad5c0678d0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); + } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); @@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } + + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a0924956bb61..d7e4b8b93f9d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 diff --git a/net/tipc/node.c b/net/tipc/node.c index db2a6c3e0be9..2dc4919ab23c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete) { - kfree(l); - le->link = NULL; - n->link_cnt--; - } } else { /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) From bd37fdf5243ccf324e3cc6fe92bf88d9a2fc1b10 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 10 Feb 2019 22:32:42 -0800 Subject: [PATCH 0980/1436] Documentation: fix some freescale dpio-driver.rst warnings Fix markup warnings for one list by using correct list syntax. Fix markup warnings for another list by using blank lines before the list. Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst:30: WARNING: Unexpected indentation. Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst:143: WARNING: Unexpected indentation. Signed-off-by: Randy Dunlap Cc: Stuart Yoder Cc: Laurentiu Tudor Cc: Ioana Radulescu Cc: netdev@vger.kernel.org Cc: Madalin Bucur Signed-off-by: David S. Miller --- .../device_drivers/freescale/dpaa2/dpio-driver.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst index a188466b6698..5045df990a4c 100644 --- a/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst +++ b/Documentation/networking/device_drivers/freescale/dpaa2/dpio-driver.rst @@ -27,11 +27,12 @@ Driver Overview The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and provides services that: - A) allow other drivers, such as the Ethernet driver, to enqueue and dequeue + + A. allow other drivers, such as the Ethernet driver, to enqueue and dequeue frames for their respective objects - B) allow drivers to register callbacks for data availability notifications + B. allow drivers to register callbacks for data availability notifications when data becomes available on a queue or channel - C) allow drivers to manage hardware buffer pools + C. allow drivers to manage hardware buffer pools The Linux DPIO driver consists of 3 primary components-- DPIO object driver-- fsl-mc driver that manages the DPIO object @@ -140,11 +141,10 @@ QBman portal interface (qbman-portal.c) The qbman-portal component provides APIs to do the low level hardware bit twiddling for operations such as: - -initializing Qman software portals - -building and sending portal commands - - -portal interrupt configuration and processing + - initializing Qman software portals + - building and sending portal commands + - portal interrupt configuration and processing The qbman-portal APIs are not public to other drivers, and are only used by dpio-service. From 6663cf821c133b36dee65fb9eb2d976abc706512 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 11 Feb 2019 09:52:59 +0200 Subject: [PATCH 0981/1436] flow_offload: Fix flow action infrastructure Implementation of macro "flow_action_for_each" introduced in commit e3ab786b42535 ("flow_offload: add flow action infrastructure") and used in commit 738678817573c ("drivers: net: use flow action infrastructure") iterated the first item twice and did not reach the last one. Fix it. Fixes: e3ab786b42535 ("flow_offload: add flow action infrastructure") Fixes: 738678817573c ("drivers: net: use flow action infrastructure") Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Acked-by: Jiri Pirko Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 23166caa0da5..a307ccb18015 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -171,7 +171,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } #define flow_action_for_each(__i, __act, __actions) \ - for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++]) + for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i]) struct flow_rule { struct flow_match match; From 4726bcf30fad37cc555cd9dcd6c73f2b2668c879 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Feb 2019 09:23:50 -0700 Subject: [PATCH 0982/1436] nvme-pci: add missing unlock for reset error The reset work holds a mutex to prevent races with removal modifying the same resources, but was unlocking only on success. Unlock on failure too. Fixes: 5c959d73dba64 ("nvme-pci: fix rapid add remove sequence") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 022ea1ee63f8..7fee665ec45e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2560,15 +2560,15 @@ static void nvme_reset_work(struct work_struct *work) mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) - goto out; + goto out_unlock; result = nvme_pci_configure_admin_queue(dev); if (result) - goto out; + goto out_unlock; result = nvme_alloc_admin_tags(dev); if (result) - goto out; + goto out_unlock; /* * Limit the max command size to prevent iod->sg allocations going @@ -2651,6 +2651,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_ctrl(&dev->ctrl); return; + out_unlock: + mutex_unlock(&dev->shutdown_lock); out: nvme_remove_dead_ctrl(dev, result); } From 13f5251fd17088170c18844534682d9cab5ff5aa Mon Sep 17 00:00:00 2001 From: Chieh-Min Wang Date: Tue, 12 Feb 2019 00:59:55 +0100 Subject: [PATCH 0983/1436] netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm For bridge(br_flood) or broadcast/multicast packets, they could clone skb with unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is never shared. With nfqueue running on my system, the race can be easily reproduced with following warning calltrace: [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: P W 4.4.60 #7744 [13257.707568] Hardware name: Qualcomm (Flattened Device Tree) [13257.714700] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [13257.720253] [] (show_stack) from [] (dump_stack+0x94/0xa8) [13257.728240] [] (dump_stack) from [] (warn_slowpath_common+0x94/0xb0) [13257.735268] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [13257.743519] [] (warn_slowpath_null) from [] (__nf_conntrack_confirm+0xa8/0x618) [13257.752284] [] (__nf_conntrack_confirm) from [] (ipv4_confirm+0xb8/0xfc) [13257.761049] [] (ipv4_confirm) from [] (nf_iterate+0x48/0xa8) [13257.769725] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.777108] [] (nf_hook_slow) from [] (br_nf_post_routing+0x274/0x31c) [13257.784486] [] (br_nf_post_routing) from [] (nf_iterate+0x48/0xa8) [13257.792556] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.800458] [] (nf_hook_slow) from [] (br_forward_finish+0x94/0xa4) [13257.808010] [] (br_forward_finish) from [] (br_nf_forward_finish+0x150/0x1ac) [13257.815736] [] (br_nf_forward_finish) from [] (nf_reinject+0x108/0x170) [13257.824762] [] (nf_reinject) from [] (nfqnl_recv_verdict+0x3d8/0x420) [13257.832924] [] (nfqnl_recv_verdict) from [] (nfnetlink_rcv_msg+0x158/0x248) [13257.841256] [] (nfnetlink_rcv_msg) from [] (netlink_rcv_skb+0x54/0xb0) [13257.849762] [] (netlink_rcv_skb) from [] (netlink_unicast+0x148/0x23c) [13257.858093] [] (netlink_unicast) from [] (netlink_sendmsg+0x2ec/0x368) [13257.866348] [] (netlink_sendmsg) from [] (sock_sendmsg+0x34/0x44) [13257.874590] [] (sock_sendmsg) from [] (___sys_sendmsg+0x1ec/0x200) [13257.882489] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x3c/0x64) [13257.890300] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x34) The original code just triggered the warning but do nothing. It will caused the shared conntrack moves to the dying list and the packet be droppped (nf_ct_resolve_clash returns NF_DROP for dying conntrack). - Reproduce steps: +----------------------------+ | br0(bridge) | | | +-+---------+---------+------+ | eth0| | eth1| | eth2| | | | | | | +--+--+ +--+--+ +---+-+ | | | | | | +--+-+ +-+--+ +--+-+ | PC1| | PC2| | PC3| +----+ +----+ +----+ iptables -A FORWARD -m mark --mark 0x1000000/0x1000000 -j NFQUEUE --queue-num 100 --queue-bypass ps: Our nfq userspace program will set mark on packets whose connection has already been processed. PC1 sends broadcast packets simulated by hping3: hping3 --rand-source --udp 192.168.1.255 -i u100 - Broadcast racing flow chart is as follow: br_handle_frame BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish) // skb->_nfct (unconfirmed conntrack) is constructed at PRE_ROUTING stage br_handle_frame_finish // check if this packet is broadcast br_flood_forward br_flood list_for_each_entry_rcu(p, &br->port_list, list) // iterate through each port maybe_deliver deliver_clone skb = skb_clone(skb) __br_forward BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...) // queue in our nfq and received by our userspace program // goto __nf_conntrack_confirm with process context on CPU 1 br_pass_frame_up BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...) // goto __nf_conntrack_confirm with softirq context on CPU 0 Because conntrack confirm can happen at both INPUT and POSTROUTING stage. So with NFQUEUE running, skb->_nfct with the same unconfirmed conntrack could race on different core. This patch fixes a repeating kernel splat, now it is only displayed once. Signed-off-by: Chieh-Min Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 815956ac5a76..85de2a7b0ede 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -936,10 +936,18 @@ __nf_conntrack_confirm(struct sk_buff *skb) * REJECT will give spurious warnings here. */ - /* No external references means no one else could have - * confirmed us. + /* Another skb with the same unconfirmed conntrack may + * win the race. This may happen for bridge(br_flood) + * or broadcast/multicast packets do skb_clone with + * unconfirmed conntrack. */ - WARN_ON(nf_ct_is_confirmed(ct)); + if (unlikely(nf_ct_is_confirmed(ct))) { + WARN_ON_ONCE(1); + nf_conntrack_double_unlock(hash, reply_hash); + local_bh_enable(); + return NF_DROP; + } + pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from From 098e13f5b21d3398065fce8780f07a3ef62f4812 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Mon, 11 Feb 2019 16:14:39 +0100 Subject: [PATCH 0984/1436] ipvs: fix dependency on nf_defrag_ipv6 ipvs relies on nf_defrag_ipv6 module to manage IPv6 fragmentation, but lacks proper Kconfig dependencies and does not explicitly request defrag features. As a result, if netfilter hooks are not loaded, when IPv6 fragmented packet are handled by ipvs only the first fragment makes through. Fix it properly declaring the dependency on Kconfig and registering netfilter hooks on ip_vs_add_service() and ip_vs_new_dest(). Reported-by: Li Shuang Signed-off-by: Andrea Claudi Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/Kconfig | 1 + net/netfilter/ipvs/ip_vs_core.c | 10 ++++------ net/netfilter/ipvs/ip_vs_ctl.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index cad48d07c818..8401cefd9f65 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index fe9abf3cc10a..235205c93e14 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1536,14 +1536,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d6318664eb2..86afacb07e5f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include #include +#include #endif #include #include @@ -895,6 +896,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; + int ret = 0; EnterFunction(2); @@ -905,6 +907,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1228,6 +1234,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif From d8b879bb2ed25e641ab99a5144843786922e2a64 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 13:03:53 +0000 Subject: [PATCH 0985/1436] drm/i915/pmu: Fix enable count array size and bounds checking Enable count array is supposed to have one counter for each possible engine sampler. As such, array sizing and bounds checking is not correct and would blow up the asserts if more samplers were added. No ill-effect in the current code base but lets fix it for correctness. At the same time tidy the assert for readability and robustness. v2: * One check per assert. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: b46a33e271ed ("drm/i915/pmu: Expose a PMU interface for perf queries") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190205130353.21105-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 26a11deea685b41a43edb513194718aa1f461c9a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pmu.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/i915_pmu.h | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++---- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d6c8f8fdfda5..017fc602a10e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -594,7 +594,8 @@ static void i915_pmu_enable(struct perf_event *event) * Update the bitmask of enabled events and increment * the event reference counter. */ - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); i915->pmu.enable |= BIT_ULL(bit); i915->pmu.enable_count[bit]++; @@ -615,11 +616,16 @@ static void i915_pmu_enable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(sample); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != + I915_ENGINE_SAMPLE_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != + I915_ENGINE_SAMPLE_COUNT); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + + engine->pmu.enable |= BIT(sample); engine->pmu.enable_count[sample]++; } @@ -649,9 +655,11 @@ static void i915_pmu_disable(struct perf_event *event) engine = intel_engine_lookup_user(i915, engine_event_class(event), engine_event_instance(event)); - GEM_BUG_ON(!engine); - GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); + GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* * Decrement the reference count and clear the enabled * bitmask when the last listener on an event goes away. @@ -660,7 +668,7 @@ static void i915_pmu_disable(struct perf_event *event) engine->pmu.enable &= ~BIT(sample); } - GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count)); GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); /* * Decrement the reference count and clear the enabled diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 7f164ca3db12..b3728c5f13e7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -31,6 +31,8 @@ enum { ((1 << I915_PMU_SAMPLE_BITS) + \ (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) +#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample { u64 cur; }; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 72edaa7ff411..a1a7cc29fdd1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -415,16 +415,17 @@ struct intel_engine_cs { /** * @enable_count: Reference count for the enabled samplers. * - * Index number corresponds to the bit number from @enable. + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ - unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; /** * @sample: Counter values for sampling events. * * Our internal timer stores the current counters in this field. + * + * Index number corresponds to @enum drm_i915_pmu_engine_sample. */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) - struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; /* From e8a8fedd57fdcebf0e4f24ef0fc7e29323df8e66 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 29 Jan 2019 14:09:59 -0500 Subject: [PATCH 0986/1436] drm/i915: Block fbdev HPD processing during suspend When resuming, we check whether or not any previously connected MST topologies are still present and if so, attempt to resume them. If this fails, we disable said MST topologies and fire off a hotplug event so that userspace knows to reprobe. However, sending a hotplug event involves calling drm_fb_helper_hotplug_event(), which in turn results in fbcon doing a connector reprobe in the caller's thread - something we can't do at the point in which i915 calls drm_dp_mst_topology_mgr_resume() since hotplugging hasn't been fully initialized yet. This currently causes some rather subtle but fatal issues. For example, on my T480s the laptop dock connected to it usually disappears during a suspend cycle, and comes back up a short while after the system has been resumed. This guarantees pretty much every suspend and resume cycle, drm_dp_mst_topology_mgr_set_mst(mgr, false); will be caused and in turn, a connector hotplug will occur. Now it's Rute Goldberg time: when the connector hotplug occurs, i915 reprobes /all/ of the connectors, including eDP. However, eDP probing requires that we power on the panel VDD which in turn, grabs a wakeref to the appropriate power domain on the GPU (on my T480s, this is the PORT_DDI_A_IO domain). This is where things start breaking, since this all happens before intel_power_domains_enable() is called we end up leaking the wakeref that was acquired and never releasing it later. Come next suspend/resume cycle, this causes us to fail to shut down the GPU properly, which causes it not to resume properly and die a horrible complicated death. (as a note: this only happens when there's both an eDP panel and MST topology connected which is removed mid-suspend. One or the other seems to always be OK). We could try to fix the VDD wakeref leak, but this doesn't seem like it's worth it at all since we aren't able to handle hotplug detection while resuming anyway. So, let's go with a more robust solution inspired by nouveau: block fbdev from handling hotplug events until we resume fbdev. This allows us to still send sysfs hotplug events to be handled later by user space while we're resuming, while also preventing us from actually processing any hotplug events we receive until it's safe. This fixes the wakeref leak observed on the T480s and as such, also fixes suspend/resume with MST topologies connected on this machine. Changes since v2: * Don't call drm_fb_helper_hotplug_event() under lock, do it after lock (Chris Wilson) * Don't call drm_fb_helper_hotplug_event() in intel_fbdev_output_poll_changed() under lock (Chris Wilson) * Always set ifbdev->hpd_waiting (Chris Wilson) Signed-off-by: Lyude Paul Fixes: 0e32b39ceed6 ("drm/i915: add DP 1.2 MST support (v0.7)") Cc: Todd Previte Cc: Dave Airlie Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Imre Deak Cc: intel-gfx@lists.freedesktop.org Cc: # v3.17+ Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190129191001.442-2-lyude@redhat.com (cherry picked from commit fe5ec65668cdaa4348631d8ce1766eed43b33c10) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_drv.h | 10 +++++++++ drivers/gpu/drm/i915/intel_fbdev.c | 33 +++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f94a04b4ad87..e9ddeaf05a14 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -209,6 +209,16 @@ struct intel_fbdev { unsigned long vma_flags; async_cookie_t cookie; int preferred_bpp; + + /* Whether or not fbdev hpd processing is temporarily suspended */ + bool hpd_suspended : 1; + /* Set when a hotplug was received while HPD processing was + * suspended + */ + bool hpd_waiting : 1; + + /* Protects hpd_suspended */ + struct mutex hpd_lock; }; struct intel_encoder { diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index fb5bb5b32a60..7f365ac0b549 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -679,6 +679,7 @@ int intel_fbdev_init(struct drm_device *dev) if (ifbdev == NULL) return -ENOMEM; + mutex_init(&ifbdev->hpd_lock); drm_fb_helper_prepare(dev, &ifbdev->helper, &intel_fb_helper_funcs); if (!intel_fbdev_init_bios(dev, ifbdev)) @@ -752,6 +753,26 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv) intel_fbdev_destroy(ifbdev); } +/* Suspends/resumes fbdev processing of incoming HPD events. When resuming HPD + * processing, fbdev will perform a full connector reprobe if a hotplug event + * was received while HPD was suspended. + */ +static void intel_fbdev_hpd_set_suspend(struct intel_fbdev *ifbdev, int state) +{ + bool send_hpd = false; + + mutex_lock(&ifbdev->hpd_lock); + ifbdev->hpd_suspended = state == FBINFO_STATE_SUSPENDED; + send_hpd = !ifbdev->hpd_suspended && ifbdev->hpd_waiting; + ifbdev->hpd_waiting = false; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd) { + DRM_DEBUG_KMS("Handling delayed fbcon HPD event\n"); + drm_fb_helper_hotplug_event(&ifbdev->helper); + } +} + void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -773,6 +794,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous */ if (state != FBINFO_STATE_RUNNING) flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); } else { /* @@ -800,17 +822,26 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous drm_fb_helper_set_suspend(&ifbdev->helper, state); console_unlock(); + + intel_fbdev_hpd_set_suspend(ifbdev, state); } void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + bool send_hpd; if (!ifbdev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma || ifbdev->helper.deferred_setup) + + mutex_lock(&ifbdev->hpd_lock); + send_hpd = !ifbdev->hpd_suspended; + ifbdev->hpd_waiting = true; + mutex_unlock(&ifbdev->hpd_lock); + + if (send_hpd && (ifbdev->vma || ifbdev->helper.deferred_setup)) drm_fb_helper_hotplug_event(&ifbdev->helper); } From 2e7bd10e05afb866b5fb13eda25095c35d7a27cc Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 7 Feb 2019 10:54:53 +0200 Subject: [PATCH 0987/1436] drm/i915: Prevent a race during I915_GEM_MMAP ioctl with WC set Make sure the underlying VMA in the process address space is the same as it was during vm_mmap to avoid applying WC to wrong VMA. A more long-term solution would be to have vm_mmap_locked variant in linux/mmap.h for when caller wants to hold mmap_sem for an extended duration. v2: - Refactor the compare function Fixes: 1816f9236303 ("drm/i915: Support creation of unbound wc user mappings for objects") Reported-by: Adam Zabrocki Suggested-by: Linus Torvalds Signed-off-by: Joonas Lahtinen Cc: # v4.0+ Cc: Akash Goel Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Adam Zabrocki Reviewed-by: Chris Wilson Reviewed-by: Tvrtko Ursulin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190207085454.10598-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 5c4604e757ba9b193b09768d75a7d2105a5b883f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 216f52b744a6..c882ea94172c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1824,6 +1824,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, return 0; } +static inline bool +__vma_matches(struct vm_area_struct *vma, struct file *filp, + unsigned long addr, unsigned long size) +{ + if (vma->vm_file != filp) + return false; + + return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size; +} + /** * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address * it is mapped to. @@ -1882,7 +1892,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINTR; } vma = find_vma(mm, addr); - if (vma) + if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); else From fc89a38d99d4b1b33ca5b0e2329f5ddea02ecfb5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Feb 2019 20:42:52 +0200 Subject: [PATCH 0988/1436] drm/i915/opregion: fix version check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The u32 version field encodes major, minor, revision and reserved. We've basically been checking for any non-zero version. Add opregion version logging while at it. v2: Fix the fix of the version check Fixes: 04ebaadb9f2d ("drm/i915/opregion: handle VBT sizes bigger than 6 KB") Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190208184254.24123-1-jani.nikula@intel.com (cherry picked from commit 98fdaaca9537b997062f1abc0aa87c61b50ce40a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_opregion.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index b8f106d9ecf8..3035b402973f 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -55,7 +55,12 @@ struct opregion_header { u8 signature[16]; u32 size; - u32 opregion_ver; + struct { + u8 rsvd; + u8 revision; + u8 minor; + u8 major; + } __packed over; u8 bios_ver[32]; u8 vbios_ver[16]; u8 driver_ver[16]; @@ -925,6 +930,11 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->header = base; opregion->lid_state = base + ACPI_CLID; + DRM_DEBUG_DRIVER("ACPI OpRegion version %u.%u.%u\n", + opregion->header->over.major, + opregion->header->over.minor, + opregion->header->over.revision); + mboxes = opregion->header->mboxes; if (mboxes & MBOX_ACPI) { DRM_DEBUG_DRIVER("Public ACPI methods supported\n"); @@ -953,7 +963,7 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (dmi_check_system(intel_no_opregion_vbt)) goto out; - if (opregion->header->opregion_ver >= 2 && opregion->asle && + if (opregion->header->over.major >= 2 && opregion->asle && opregion->asle->rvda && opregion->asle->rvds) { opregion->rvda = memremap(opregion->asle->rvda, opregion->asle->rvds, From 16eb0f34cdf4cf04cd92762c7a79f98aa51e053f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Feb 2019 20:42:53 +0200 Subject: [PATCH 0989/1436] drm/i915/opregion: rvda is relative from opregion base in opregion 2.1+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from opregion version 2.1 (roughly corresponding to ICL+) the RVDA field is relative from the beginning of opregion, not absolute address. Fix the error path while at it. v2: Make relative vs. absolute conditional on the opregion version, bumped for the purpose. Turned out there are machines relying on absolute RVDA in the wild. v3: Fix the version checks Fixes: 04ebaadb9f2d ("drm/i915/opregion: handle VBT sizes bigger than 6 KB") Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190208184254.24123-2-jani.nikula@intel.com (cherry picked from commit a0f52c3d357af218a9c1f7cd906ab70426176a1a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_opregion.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 3035b402973f..3ac20153705a 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -124,7 +124,8 @@ struct opregion_asle { u64 fdss; u32 fdsp; u32 stat; - u64 rvda; /* Physical address of raw vbt data */ + u64 rvda; /* Physical (2.0) or relative from opregion (2.1+) + * address of raw VBT data. */ u32 rvds; /* Size of raw vbt data */ u8 rsvd[58]; } __packed; @@ -965,9 +966,24 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (opregion->header->over.major >= 2 && opregion->asle && opregion->asle->rvda && opregion->asle->rvds) { - opregion->rvda = memremap(opregion->asle->rvda, - opregion->asle->rvds, + resource_size_t rvda = opregion->asle->rvda; + + /* + * opregion 2.0: rvda is the physical VBT address. + * + * opregion 2.1+: rvda is unsigned, relative offset from + * opregion base, and should never point within opregion. + */ + if (opregion->header->over.major > 2 || + opregion->header->over.minor >= 1) { + WARN_ON(rvda < OPREGION_SIZE); + + rvda += asls; + } + + opregion->rvda = memremap(rvda, opregion->asle->rvds, MEMREMAP_WB); + vbt = opregion->rvda; vbt_size = opregion->asle->rvds; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { @@ -977,6 +993,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) goto out; } else { DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n"); + memunmap(opregion->rvda); + opregion->rvda = NULL; } } From 6b1971c694975e49af302229202c0043568b1791 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 7 Feb 2019 11:42:14 +0100 Subject: [PATCH 0990/1436] x86/kvm/nVMX: read from MSR_IA32_VMX_PROCBASED_CTLS2 only when it is available SDM says MSR_IA32_VMX_PROCBASED_CTLS2 is only available "If (CPUID.01H:ECX.[5] && IA32_VMX_PROCBASED_CTLS[63])". It was found that some old cpus (namely "Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz (family: 0x6, model: 0xf, stepping: 0x6") don't have it. Add the missing check. Reported-by: Zdenek Kaspar Tested-by: Zdenek Kaspar Signed-off-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d8ea4ebd79e7..8b45205b4e1d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5557,9 +5557,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, * secondary cpu-based controls. Do not include those that * depend on CPUID bits, they are added later by vmx_cpuid_update. */ - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - msrs->secondary_ctls_low, - msrs->secondary_ctls_high); + if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + msrs->secondary_ctls_low, + msrs->secondary_ctls_high); + msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= SECONDARY_EXEC_DESC | From 0ac569bf6a7983c0c5747d6df8db9dc05bc92b6c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 5 Feb 2019 16:37:40 +0100 Subject: [PATCH 0991/1436] ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction commit e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") introduced a regression in optimized kprobes. It triggers "invalid instruction" oopses when using kprobes instrumentation through lttng and perf. This commit was introduced in kernel v4.20, and has been backported to stable kernels 4.19 and 4.14. This crash was also reported by Hongzhi Song on the redhat bugzilla where the patch was originally introduced. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1639397 Link: https://bugs.lttng.org/issues/1174 Link: https://lore.kernel.org/lkml/342740659.2887.1549307721609.JavaMail.zimbra@efficios.com Fixes: e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") Signed-off-by: Mathieu Desnoyers Reported-by: Robert Berger Tested-by: Robert Berger Acked-by: Kees Cook Cc: Robert Berger Cc: Masami Hiramatsu Cc: William Cohen Cc: Laura Abbott Cc: Kees Cook Cc: # v4.14+ Cc: linux-arm-kernel@lists.infradead.org Cc: patches@armlinux.org.uk Signed-off-by: Russell King --- arch/arm/probes/kprobes/opt-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 2c118a6ab358..0dc23fc227ed 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned char *)optprobe_template_entry, + memcpy(code, (unsigned long *)&optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ From fc67e6f120a388b611d94cc40baf99a5cc56b283 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 6 Feb 2019 18:43:24 +0100 Subject: [PATCH 0992/1436] ARM: 8835/1: dma-mapping: Clear DMA ops on teardown Installing the appropriate non-IOMMU DMA ops in arm_iommu_detch_device() serves the case where IOMMU-aware drivers choose to control their own mapping but still make DMA API calls, however it also affects the case when the arch code itself tears down the mapping upon driver unbinding, where the ops now get left in place and can inhibit arch_setup_dma_ops() on subsequent re-probe attempts. Fix the latter case by making sure that arch_teardown_dma_ops() cleans up whenever the ops were automatically installed by its counterpart. Reported-by: Tobias Jakobi Reported-by: Marek Szyprowski Fixes: 1874619a7df4 "ARM: dma-mapping: Set proper DMA ops in arm_iommu_detach_device()" Tested-by: Tobias Jakobi Tested-by: Thierry Reding Signed-off-by: Robin Murphy Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1e2922e447c..1e3e08a1c456 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2390,4 +2390,6 @@ void arch_teardown_dma_ops(struct device *dev) return; arm_teardown_iommu_dma_ops(dev); + /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ + set_dma_ops(dev, NULL); } From 1e405c1a3f667bf152905127b94e9c8f454a343e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 12 Feb 2019 08:51:14 +0100 Subject: [PATCH 0993/1436] xsk: do not remove umem from netdevice on fall-back to copy-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") stores the umem into the netdev._rx struct. However, the patch incorrectly removed the umem from the netdev._rx struct when user-space passed "best-effort" mode (i.e. select the fastest possible option available), and zero-copy mode was not available. This commit fixes that. Fixes: c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 597866e7c441..37e1fe180769 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -125,9 +125,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, return 0; err_unreg_umem: - xdp_clear_umem_at_qid(dev, queue_id); if (!force_zc) err = 0; /* fallback to copy mode */ + if (err) + xdp_clear_umem_at_qid(dev, queue_id); out_rtnl_unlock: rtnl_unlock(); return err; From ebbed0f46ed9d3ae23291d67cd52d18abb8501bc Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Tue, 12 Feb 2019 10:25:12 +0900 Subject: [PATCH 0994/1436] tools: bpftool: doc, add text about feature-subcommand This patch adds missing information about feature-subcommand in bpftool.rst Signed-off-by: Prashant Bhole Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 27153bb816ac..4f2188845dd8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -34,6 +34,8 @@ SYNOPSIS *NET-COMMANDS* := { **show** | **list** | **help** } + *FEATURE-COMMANDS* := { **probe** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects From dd27c2e3d0a05c01ff14bb672d1a3f0fdd8f98fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Feb 2019 00:20:39 -0800 Subject: [PATCH 0995/1436] bpf: offload: add priv field for drivers Currently bpf_offload_dev does not have any priv pointer, forcing the drivers to work backwards from the netdev in program metadata. This is not great given programs are conceptually associated with the offload device, and it means one or two unnecessary deferences. Add a priv pointer to bpf_offload_dev. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 +- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 +--- drivers/net/netdevsim/bpf.c | 5 +++-- include/linux/bpf.h | 3 ++- kernel/bpf/offload.c | 10 +++++++++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index dccae0319204..275de9f4c61c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app) app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf); } - bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops); + bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf); err = PTR_ERR_OR_ZERO(bpf->bpf_dev); if (err) goto err_free_neutral_maps; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 55c7dbf8b421..15dce97650a5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -185,8 +185,6 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) static int nfp_bpf_verifier_prep(struct bpf_prog *prog) { - struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); - struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -197,7 +195,7 @@ static int nfp_bpf_verifier_prep(struct bpf_prog *prog) INIT_LIST_HEAD(&nfp_prog->insns); nfp_prog->type = prog->type; - nfp_prog->bpf = app->priv; + nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev); ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); if (ret) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 172b271c8bd2..f92c43453ec6 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -248,7 +248,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { - struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev); + struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; @@ -589,7 +589,8 @@ int nsim_bpf_init(struct netdevsim *ns) if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs)) return -ENOMEM; - ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops); + ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, + ns); err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev); if (err) return err; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f58828755fd..de18227b3d95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -773,8 +773,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); struct bpf_offload_dev * -bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops); +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); +void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev); int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 39dba8c90331..ba635209ae9a 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock); struct bpf_offload_dev { const struct bpf_prog_offload_ops *ops; struct list_head netdevs; + void *priv; }; struct bpf_offload_netdev { @@ -669,7 +670,7 @@ unlock: EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); struct bpf_offload_dev * -bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv) { struct bpf_offload_dev *offdev; int err; @@ -688,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) return ERR_PTR(-ENOMEM); offdev->ops = ops; + offdev->priv = priv; INIT_LIST_HEAD(&offdev->netdevs); return offdev; @@ -700,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev) kfree(offdev); } EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy); + +void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev) +{ + return offdev->priv; +} +EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); From 96d7cb932e826219ec41ac02e5af037ffae6098c Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 29 Jan 2019 16:34:04 +0800 Subject: [PATCH 0996/1436] floppy: check_events callback should not return a negative number floppy_check_events() is supposed to return bit flags to say which events occured. We should return zero to say that no event flags are set. Only BIT(0) and BIT(1) are used in the caller. And .check_events interface also expect to return an unsigned int value. However, after commit a0c80efe5956, it may return -EINTR (-4u). Here, both BIT(0) and BIT(1) are cleared. So this patch shouldn't affect runtime, but it obviously is still worth fixing. Reviewed-by: Dan Carpenter Fixes: a0c80efe5956 ("floppy: fix lock_fdc() signal handling") Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6f2856c6d0f2..55481b40df9a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4075,7 +4075,7 @@ static unsigned int floppy_check_events(struct gendisk *disk, if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { if (lock_fdc(drive)) - return -EINTR; + return 0; poll_drive(false, 0); process_fd_request(); } From 0ff8409b521b7b315bd69c4f1fbff024dc6219f6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Feb 2019 00:18:02 -0800 Subject: [PATCH 0997/1436] nfp: flower: remove double new line Recent cls_flower offload rewrite added a double new line. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index fe1469d201af..450d7296fd57 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -187,7 +187,6 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) return -EOPNOTSUPP; - flow_rule_match_enc_ports(rule, &enc_ports); if (enc_ports.mask->dst != cpu_to_be16(~0)) return -EOPNOTSUPP; From c65285428b6e7797f1bb063f33b0ae7e93397b7b Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 12 Feb 2019 13:10:00 +0000 Subject: [PATCH 0998/1436] sfc: initialise found bitmap in efx_ef10_mtd_probe The bitmap of found partitions in efx_ef10_mtd_probe was not initialised, causing partitions to be suppressed based off whatever value was in the bitmap at the start. Fixes: 3366463513f5 ("sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 2f2bda68d861..c08034154a9a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6115,7 +6115,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); - DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 }; struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; From 4dff63c25ebf8963065323964cf1c24e290b6ced Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:50 +0100 Subject: [PATCH 0999/1436] net/smc: reset cursor update required flag When an updated rx_cursor_confirmed field was sent to the peer then reset the cons_curs_upd_req flag. And remove the duplicate reset and cursor update in smc_tx_consumer_update(). Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 5 ++++- net/smc/smc_tx.c | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a712c9f8699b..99d9d6e85dfb 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -105,8 +105,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, &conn->local_tx_ctrl, conn); smc_curs_copy(&cfed, &((struct smc_host_cdc_msg *)wr_buf)->cons, conn); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) + if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } return rc; } @@ -194,6 +196,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) if (rc) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f93f3580c100..ce9586bce364 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -610,9 +610,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) SMC_TX_WORK_DELAY); return; } - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } if (conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) From 5bc056d8d0e3f5f896eb747d42d1e3aa8ceaf1b0 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:51 +0100 Subject: [PATCH 1000/1436] net/smc: move wake up of close waiter Move the call to smc_close_wake_tx_prepared() (which wakes up a possibly waiting close processing that might wait for 'all data sent') to smc_tx_sndbuf_nonempty() (which is the main function to send data). Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 -- net/smc/smc_tx.c | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99d9d6e85dfb..780b36c69292 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -290,8 +290,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ if (diff_cons && smc_tx_prepared_sends(conn)) { smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); } if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index ce9586bce364..dd10a913b38e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,6 +24,7 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_close.h" #include "smc_ism.h" #include "smc_tx.h" @@ -554,6 +555,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) else rc = smcr_tx_sndbuf_nonempty(conn); + if (!rc) { + /* trigger socket release if connection is closing */ + struct smc_sock *smc = container_of(conn, struct smc_sock, + conn); + smc_close_wake_tx_prepared(smc); + } return rc; } From 16297d143989e3f5acd75c1ca0a771b78aa12b46 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:52 +0100 Subject: [PATCH 1001/1436] net/smc: no delay for free tx buffer wait When no free transfer buffers are available then a work to call smc_tx_work() is scheduled. Set the schedule delay to zero, because for the out-of-buffers condition the work can start immediately and will block in the called function smc_wr_tx_get_free_slot(), waiting for free buffers. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index dd10a913b38e..a3bff08ff8c8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -28,7 +28,7 @@ #include "smc_ism.h" #include "smc_tx.h" -#define SMC_TX_WORK_DELAY HZ +#define SMC_TX_WORK_DELAY 0 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ From cf0cfe535845cf60830e1482f7e618d80140e8d9 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:53 +0100 Subject: [PATCH 1002/1436] net/smc: reduce amount of status updates to peer In smc_cdc_msg_recv_action() the received cdc message is evaluated. To reduce the number of messaged triggered by this evaluation the logic is streamlined. For the write_blocked condition we do not need to send a response immediately. The remaining conditions can be put together into one if clause. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 780b36c69292..28bbdb04bc35 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -273,24 +273,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else { - if (conn->local_rx_ctrl.prod_flags.write_blocked || - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) { - if (conn->local_rx_ctrl.prod_flags.urg_data_pending) - conn->urg_state = SMC_URG_NOTYET; - /* force immediate tx of current consumer cursor, but - * under send_lock to guarantee arrival in seqno-order - */ - if (smc->sk.sk_state != SMC_INIT) - smc_tx_sndbuf_nonempty(conn); - } + if (conn->local_rx_ctrl.prod_flags.write_blocked) + smc->sk.sk_data_ready(&smc->sk); + if (conn->local_rx_ctrl.prod_flags.urg_data_pending) + conn->urg_state = SMC_URG_NOTYET; } - /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { + if ((diff_cons && smc_tx_prepared_sends(conn)) || + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || + conn->local_rx_ctrl.prod_flags.urg_data_pending) smc_tx_sndbuf_nonempty(conn); - } + if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { /* urg data confirmed by peer, indicate we're ready for more */ From e78b2622266ce962b18efad8f29acdc6052db010 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:54 +0100 Subject: [PATCH 1003/1436] net/smc: check connections in smc_lgr_free_work Remove the shortcut that smc_lgr_free() would skip the check for existing connections when the link group is not in the link group list. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 349d789a9728..53a17cfa61af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -160,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) - goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -169,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); return; } - list_del_init(&lgr->list); /* remove from smc_lgr_list */ -free: + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { From 81cf6430526531bc688d4c3d09506dba7b456fb1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 12 Feb 2019 16:29:55 +0100 Subject: [PATCH 1004/1436] net/smc: check port_idx of ib event For robustness protect of higher port numbers than expected to avoid setting bits behind our port_event_mask. In case of an DEVICE_FATAL event all ports must be checked. The IB_EVENT_GID_CHANGE event is provided in the global event handler, so handle it there. And handle a QP_FATAL event instead of an DEVICE_FATAL event in the qp handler. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 76487a16934e..0b244be24fe0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,13 +257,21 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { - case IB_EVENT_PORT_ERR: case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_PORT_ACTIVE: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); + /* terminate all ports on device */ + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + set_bit(port_idx, &smcibdev->port_event_mask); schedule_work(&smcibdev->port_event_work); break; + case IB_EVENT_PORT_ERR: + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_GID_CHANGE: + port_idx = ibevent->element.port_num - 1; + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } + break; default: break; } @@ -294,13 +302,13 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) u8 port_idx; switch (ibevent->event) { - case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_GID_CHANGE: - case IB_EVENT_PORT_ERR: + case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: port_idx = ibevent->element.qp->port - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; From 994c6e29564b7faa852f4b746989012dc6694082 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 12 Feb 2019 16:29:56 +0100 Subject: [PATCH 1005/1436] MAINTAINERS: add Karsten as SMC maintainer Add Karsten as additional maintainer for Shared Memory Communications (SMC) Sockets. Acked-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 604bca2fc05d..f3af5cde6456 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13760,6 +13760,7 @@ F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Ursula Braun +M: Karsten Graul L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported From 48ebab31d424fbdb8ede8914923bec671a933246 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Tue, 12 Feb 2019 16:29:51 +0000 Subject: [PATCH 1006/1436] mlxsw: spectrum: Set LAG port collector only when active The LAG port collecting (receive) function was mistakenly set when the port was registered as a LAG member, while it should be set only when the port collection state is set to true. Set LAG port to collecting when it is set to distributing, as described in the IEEE link aggregation standard coupled control mux machine state diagram. Signed-off-by: Nir Dotan Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7c9745cecbbd..9686d3822b92 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4771,9 +4771,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; - err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); - if (err) - goto err_col_port_enable; mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, mlxsw_sp_port->local_port); @@ -4787,8 +4784,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4807,7 +4802,6 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); /* Any VLANs configured on the port are no longer valid */ @@ -4852,21 +4846,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } -static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool lag_tx_enabled) +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - if (lag_tx_enabled) - return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, - mlxsw_sp_port->lag_id); - else - return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, - mlxsw_sp_port->lag_id); + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; } static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, struct netdev_lag_lower_state_info *info) { - return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); } static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -5089,8 +5118,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); } else { - mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, - false); + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); } From 24f91ce0d27c535b3f91fc4e6627a3761db094c4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 12 Feb 2019 16:29:52 +0000 Subject: [PATCH 1007/1436] mlxsw: spectrum_router: Drop unnecessary WARN_ON_ONCE() In case the register access failed an error would be logged anyway, so we can drop the warning. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 818040ce4d68..52fed8c7bf1e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6142,7 +6142,7 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) mlxsw_reg_ritr_rif_pack(ritr_pl, rif); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) + if (err) return err; mlxsw_reg_ritr_enable_set(ritr_pl, false); From 384c2f7473bc4f6b25f62060ab4d5985726bb12d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 12 Feb 2019 16:29:53 +0000 Subject: [PATCH 1008/1436] mlxsw: spectrum_flower: Fix VLAN modify action support The driver does not support VLAN push and pop, but only VLAN modify. Fixes: 738678817573 ("drivers: net: use flow action infrastructure") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 9af9f5c1b25c..15f804453cd6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -102,8 +102,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return err; } break; - case FLOW_ACTION_VLAN_PUSH: - case FLOW_ACTION_VLAN_POP: { + case FLOW_ACTION_VLAN_MANGLE: { u16 proto = be16_to_cpu(act->vlan.proto); u8 prio = act->vlan.prio; u16 vid = act->vlan.vid; From f5c7bd93c4f1ddf3a3029f56b277a0013fe96886 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 Feb 2019 16:29:54 +0000 Subject: [PATCH 1009/1436] selftests: mlxsw: avoid double sourcing of lib.sh Don't source lib.sh 2 times and make the script work with ifnames passed on the command line. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index a0a80e1a69e8..e7ffc79561b7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 NUM_NETIFS=6 -source ../../../../net/forwarding/lib.sh source ../../../../net/forwarding/tc_common.sh source devlink_lib_spectrum.sh From e222822f9be03c6d985ba1af1f22ec863368b8d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:19:57 +0100 Subject: [PATCH 1010/1436] net: caif: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Also use the proper Kconfig symbol to check for DMA API availability. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/caif/caif_spi.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index d28a1398c091..7608bc3e00df 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -73,35 +73,37 @@ MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment."); #define LOW_WATER_MARK 100 #define HIGH_WATER_MARK (LOW_WATER_MARK*5) -#ifdef CONFIG_UML +#ifndef CONFIG_HAS_DMA /* * We sometimes use UML for debugging, but it cannot handle * dma_alloc_coherent so we have to wrap it. */ -static inline void *dma_alloc(dma_addr_t *daddr) +static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr) { return kmalloc(SPI_DMA_BUF_LEN, GFP_KERNEL); } -static inline void dma_free(void *cpu_addr, dma_addr_t handle) +static inline void dma_free(struct cfspi *cfspi, void *cpu_addr, + dma_addr_t handle) { kfree(cpu_addr); } #else -static inline void *dma_alloc(dma_addr_t *daddr) +static inline void *dma_alloc(struct cfspi *cfspi, dma_addr_t *daddr) { - return dma_alloc_coherent(NULL, SPI_DMA_BUF_LEN, daddr, + return dma_alloc_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, daddr, GFP_KERNEL); } -static inline void dma_free(void *cpu_addr, dma_addr_t handle) +static inline void dma_free(struct cfspi *cfspi, void *cpu_addr, + dma_addr_t handle) { - dma_free_coherent(NULL, SPI_DMA_BUF_LEN, cpu_addr, handle); + dma_free_coherent(&cfspi->pdev->dev, SPI_DMA_BUF_LEN, cpu_addr, handle); } -#endif /* CONFIG_UML */ +#endif /* CONFIG_HAS_DMA */ #ifdef CONFIG_DEBUG_FS @@ -610,13 +612,13 @@ static int cfspi_init(struct net_device *dev) } /* Allocate DMA buffers. */ - cfspi->xfer.va_tx[0] = dma_alloc(&cfspi->xfer.pa_tx[0]); + cfspi->xfer.va_tx[0] = dma_alloc(cfspi, &cfspi->xfer.pa_tx[0]); if (!cfspi->xfer.va_tx[0]) { res = -ENODEV; goto err_dma_alloc_tx_0; } - cfspi->xfer.va_rx = dma_alloc(&cfspi->xfer.pa_rx); + cfspi->xfer.va_rx = dma_alloc(cfspi, &cfspi->xfer.pa_rx); if (!cfspi->xfer.va_rx) { res = -ENODEV; @@ -665,9 +667,9 @@ static int cfspi_init(struct net_device *dev) return 0; err_create_wq: - dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx); + dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx); err_dma_alloc_rx: - dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); + dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); err_dma_alloc_tx_0: return res; } @@ -683,8 +685,8 @@ static void cfspi_uninit(struct net_device *dev) cfspi->ndev = NULL; /* Free DMA buffers. */ - dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx); - dma_free(cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); + dma_free(cfspi, cfspi->xfer.va_rx, cfspi->xfer.pa_rx); + dma_free(cfspi, cfspi->xfer.va_tx[0], cfspi->xfer.pa_tx[0]); set_bit(SPI_TERMINATE, &cfspi->state); wake_up_interruptible(&cfspi->wait); destroy_workqueue(cfspi->wq); From 3c69aec48f3c89bf1b29f11ea949c4f595577ab4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:19:58 +0100 Subject: [PATCH 1011/1436] au1000_eth: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e833d1b3fe18..e5073aeea06a 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1167,7 +1167,7 @@ static int au1000_probe(struct platform_device *pdev) /* Allocate the data buffers * Snooping works fine with eth on all au1xxx */ - aup->vaddr = (u32)dma_alloc_attrs(NULL, MAX_BUF_SIZE * + aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), &aup->dma_addr, 0, DMA_ATTR_NON_CONSISTENT); @@ -1349,7 +1349,7 @@ err_remap3: err_remap2: iounmap(aup->mac); err_remap1: - dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), (void *)aup->vaddr, aup->dma_addr, DMA_ATTR_NON_CONSISTENT); err_vaddr: @@ -1383,7 +1383,7 @@ static int au1000_remove(struct platform_device *pdev) if (aup->tx_db_inuse[i]) au1000_ReleaseDB(aup, aup->tx_db_inuse[i]); - dma_free_attrs(NULL, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), (void *)aup->vaddr, aup->dma_addr, DMA_ATTR_NON_CONSISTENT); From 564923e4c0b6505436d756dcaf8d84ca33307ee3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:19:59 +0100 Subject: [PATCH 1012/1436] macb_main: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Signed-off-by: Christoph Hellwig Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index eaabe8c278ec..4d1509f431d7 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3673,9 +3673,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb, /* Store packet information (to free when Tx completed) */ lp->skb = skb; lp->skb_length = skb->len; - lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, - DMA_TO_DEVICE); - if (dma_mapping_error(NULL, lp->skb_physaddr)) { + lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) { dev_kfree_skb_any(skb); dev->stats.tx_dropped++; netdev_err(dev, "%s: DMA mapping error\n", __func__); @@ -3765,7 +3765,7 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) if (lp->skb) { dev_kfree_skb_irq(lp->skb); lp->skb = NULL; - dma_unmap_single(NULL, lp->skb_physaddr, + dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE); dev->stats.tx_packets++; dev->stats.tx_bytes += lp->skb_length; From 74e0deb89a8ba27c132b1f0e08643e215b5c1f92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:20:00 +0100 Subject: [PATCH 1013/1436] lantiq_etop: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Note this driver seems to lack dma_unmap_* calls entirely, but fixing that is left for another time. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_etop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 32ac9045cdae..f9bb890733b5 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -112,10 +112,12 @@ struct ltq_etop_priv { static int ltq_etop_alloc_skb(struct ltq_etop_chan *ch) { + struct ltq_etop_priv *priv = netdev_priv(ch->netdev); + ch->skb[ch->dma.desc] = netdev_alloc_skb(ch->netdev, MAX_DMA_DATA_LEN); if (!ch->skb[ch->dma.desc]) return -ENOMEM; - ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL, + ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(&priv->pdev->dev, ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN, DMA_FROM_DEVICE); ch->dma.desc_base[ch->dma.desc].addr = @@ -487,7 +489,7 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) netif_trans_update(dev); spin_lock_irqsave(&priv->lock, flags); - desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len, + desc->addr = ((unsigned int) dma_map_single(&priv->pdev->dev, skb->data, len, DMA_TO_DEVICE)) - byte_offset; wmb(); desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP | From e86b76f63351e8393c6a7d15bd46366da16ffd3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:20:01 +0100 Subject: [PATCH 1014/1436] pxa168_eth: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Note that this driver seems to entirely lack dma_map_single error handling, but that is left for another time. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/pxa168_eth.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index f8a6d6e3cb7a..35f2142aac5e 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -201,6 +201,7 @@ struct tx_desc { }; struct pxa168_eth_private { + struct platform_device *pdev; int port_num; /* User Ethernet port number */ int phy_addr; int phy_speed; @@ -331,7 +332,7 @@ static void rxq_refill(struct net_device *dev) used_rx_desc = pep->rx_used_desc_q; p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc]; size = skb_end_pointer(skb) - skb->data; - p_used_rx_desc->buf_ptr = dma_map_single(NULL, + p_used_rx_desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, size, DMA_FROM_DEVICE); @@ -743,7 +744,7 @@ static int txq_reclaim(struct net_device *dev, int force) netdev_err(dev, "Error in TX\n"); dev->stats.tx_errors++; } - dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE); + dma_unmap_single(&pep->pdev->dev, addr, count, DMA_TO_DEVICE); if (skb) dev_kfree_skb_irq(skb); released++; @@ -805,7 +806,7 @@ static int rxq_process(struct net_device *dev, int budget) if (rx_next_curr_desc == rx_used_desc) pep->rx_resource_err = 1; pep->rx_desc_count--; - dma_unmap_single(NULL, rx_desc->buf_ptr, + dma_unmap_single(&pep->pdev->dev, rx_desc->buf_ptr, rx_desc->buf_size, DMA_FROM_DEVICE); received_packets++; @@ -1274,7 +1275,8 @@ pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) length = skb->len; pep->tx_skb[tx_index] = skb; desc->byte_cnt = length; - desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + desc->buf_ptr = dma_map_single(&pep->pdev->dev, skb->data, length, + DMA_TO_DEVICE); skb_tx_timestamp(skb); @@ -1528,6 +1530,7 @@ static int pxa168_eth_probe(struct platform_device *pdev) if (err) goto err_free_mdio; + pep->pdev = pdev; SET_NETDEV_DEV(dev, &pdev->dev); pxa168_init_hw(pep); err = register_netdev(dev); From 5dac33ad6f0d1dbc20736fa13d833250e7f12a26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:20:02 +0100 Subject: [PATCH 1015/1436] moxart_ether: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 11 +++++++---- drivers/net/ethernet/moxa/moxart_ether.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index b34055ac476f..00dec0ffb11b 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -81,11 +81,13 @@ static void moxart_mac_free_memory(struct net_device *ndev) priv->rx_buf_size, DMA_FROM_DEVICE); if (priv->tx_desc_base) - dma_free_coherent(NULL, TX_REG_DESC_SIZE * TX_DESC_NUM, + dma_free_coherent(&priv->pdev->dev, + TX_REG_DESC_SIZE * TX_DESC_NUM, priv->tx_desc_base, priv->tx_base); if (priv->rx_desc_base) - dma_free_coherent(NULL, RX_REG_DESC_SIZE * RX_DESC_NUM, + dma_free_coherent(&priv->pdev->dev, + RX_REG_DESC_SIZE * RX_DESC_NUM, priv->rx_desc_base, priv->rx_base); kfree(priv->tx_buf_base); @@ -476,6 +478,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->ndev = ndev; + priv->pdev = pdev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ndev->base_addr = res->start; @@ -491,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev) priv->tx_buf_size = TX_BUF_SIZE; priv->rx_buf_size = RX_BUF_SIZE; - priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE * + priv->tx_desc_base = dma_alloc_coherent(&pdev->dev, TX_REG_DESC_SIZE * TX_DESC_NUM, &priv->tx_base, GFP_DMA | GFP_KERNEL); if (!priv->tx_desc_base) { @@ -499,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev) goto init_fail; } - priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE * + priv->rx_desc_base = dma_alloc_coherent(&pdev->dev, RX_REG_DESC_SIZE * RX_DESC_NUM, &priv->rx_base, GFP_DMA | GFP_KERNEL); if (!priv->rx_desc_base) { diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index bee608b547d1..bf4c3029cd0c 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -292,6 +292,7 @@ #define LINK_STATUS 0x4 struct moxart_mac_priv_t { + struct platform_device *pdev; void __iomem *base; unsigned int reg_maccr; unsigned int reg_imr; From 8d4c28fbc284bf2dee08afff146def47cafbc227 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:20:03 +0100 Subject: [PATCH 1016/1436] meth: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Also use GFP_KERNEL instead of GFP_ATOMIC as the gfp_t for the memory allocation, as we aren't in interrupt context or under a lock. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/meth.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 0e1b7e960b98..67954a9e3675 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -68,6 +68,8 @@ module_param(timeout, int, 0); * packets in and out, so there is place for a packet */ struct meth_private { + struct platform_device *pdev; + /* in-memory copy of MAC Control register */ u64 mac_ctrl; @@ -211,8 +213,8 @@ static void meth_check_link(struct net_device *dev) static int meth_init_tx_ring(struct meth_private *priv) { /* Init TX ring */ - priv->tx_ring = dma_alloc_coherent(NULL, TX_RING_BUFFER_SIZE, - &priv->tx_ring_dma, GFP_ATOMIC); + priv->tx_ring = dma_alloc_coherent(&priv->pdev->dev, + TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_KERNEL); if (!priv->tx_ring) return -ENOMEM; @@ -236,7 +238,7 @@ static int meth_init_rx_ring(struct meth_private *priv) priv->rx_ring[i]=(rx_packet*)(priv->rx_skbs[i]->head); /* I'll need to re-sync it after each RX */ priv->rx_ring_dmas[i] = - dma_map_single(NULL, priv->rx_ring[i], + dma_map_single(&priv->pdev->dev, priv->rx_ring[i], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); mace->eth.rx_fifo = priv->rx_ring_dmas[i]; } @@ -253,7 +255,7 @@ static void meth_free_tx_ring(struct meth_private *priv) dev_kfree_skb(priv->tx_skbs[i]); priv->tx_skbs[i] = NULL; } - dma_free_coherent(NULL, TX_RING_BUFFER_SIZE, priv->tx_ring, + dma_free_coherent(&priv->pdev->dev, TX_RING_BUFFER_SIZE, priv->tx_ring, priv->tx_ring_dma); } @@ -263,7 +265,7 @@ static void meth_free_rx_ring(struct meth_private *priv) int i; for (i = 0; i < RX_RING_ENTRIES; i++) { - dma_unmap_single(NULL, priv->rx_ring_dmas[i], + dma_unmap_single(&priv->pdev->dev, priv->rx_ring_dmas[i], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); priv->rx_ring[i] = 0; priv->rx_ring_dmas[i] = 0; @@ -393,7 +395,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) fifo_rptr = (fifo_rptr - 1) & 0x0f; } while (priv->rx_write != fifo_rptr) { - dma_unmap_single(NULL, priv->rx_ring_dmas[priv->rx_write], + dma_unmap_single(&priv->pdev->dev, + priv->rx_ring_dmas[priv->rx_write], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); status = priv->rx_ring[priv->rx_write]->status.raw; #if MFE_DEBUG @@ -454,7 +457,8 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) priv->rx_ring[priv->rx_write] = (rx_packet*)skb->head; priv->rx_ring[priv->rx_write]->status.raw = 0; priv->rx_ring_dmas[priv->rx_write] = - dma_map_single(NULL, priv->rx_ring[priv->rx_write], + dma_map_single(&priv->pdev->dev, + priv->rx_ring[priv->rx_write], METH_RX_BUFF_SIZE, DMA_FROM_DEVICE); mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write]; ADVANCE_RX_PTR(priv->rx_write); @@ -637,7 +641,7 @@ static void meth_tx_1page_prepare(struct meth_private *priv, } /* first page */ - catbuf = dma_map_single(NULL, buffer_data, buffer_len, + catbuf = dma_map_single(&priv->pdev->dev, buffer_data, buffer_len, DMA_TO_DEVICE); desc->data.cat_buf[0].form.start_addr = catbuf >> 3; desc->data.cat_buf[0].form.len = buffer_len - 1; @@ -663,12 +667,12 @@ static void meth_tx_2page_prepare(struct meth_private *priv, } /* first page */ - catbuf1 = dma_map_single(NULL, buffer1_data, buffer1_len, + catbuf1 = dma_map_single(&priv->pdev->dev, buffer1_data, buffer1_len, DMA_TO_DEVICE); desc->data.cat_buf[0].form.start_addr = catbuf1 >> 3; desc->data.cat_buf[0].form.len = buffer1_len - 1; /* second page */ - catbuf2 = dma_map_single(NULL, buffer2_data, buffer2_len, + catbuf2 = dma_map_single(&priv->pdev->dev, buffer2_data, buffer2_len, DMA_TO_DEVICE); desc->data.cat_buf[1].form.start_addr = catbuf2 >> 3; desc->data.cat_buf[1].form.len = buffer2_len - 1; @@ -840,6 +844,7 @@ static int meth_probe(struct platform_device *pdev) memcpy(dev->dev_addr, o2meth_eaddr, ETH_ALEN); priv = netdev_priv(dev); + priv->pdev = pdev; spin_lock_init(&priv->meth_lock); SET_NETDEV_DEV(dev, &pdev->dev); From 0eb1645a8daaabab8583333046afac90b5b77fe0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Feb 2019 14:20:04 +0100 Subject: [PATCH 1017/1436] smc911x: pass struct device to DMA API functions The DMA API generally relies on a struct device to work properly, and only barely works without one for legacy reasons. Pass the easily available struct device from the platform_device to remedy this. Note that smc911x apparently is a PIO chip with an external DMA handshake, and we probably use the wrong device here. But at least it matches the mapping side, which apparently works or at least worked in the not too distant past. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8355dfbb8ec3..b550e624500d 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1188,7 +1188,7 @@ smc911x_tx_dma_irq(void *data) DBG(SMC_DEBUG_TX | SMC_DEBUG_DMA, dev, "TX DMA irq handler\n"); BUG_ON(skb == NULL); - dma_unmap_single(NULL, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE); + dma_unmap_single(lp->dev, tx_dmabuf, tx_dmalen, DMA_TO_DEVICE); netif_trans_update(dev); dev_kfree_skb_irq(skb); lp->current_tx_skb = NULL; @@ -1219,7 +1219,7 @@ smc911x_rx_dma_irq(void *data) DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); DBG(SMC_DEBUG_RX | SMC_DEBUG_DMA, dev, "RX DMA irq handler\n"); - dma_unmap_single(NULL, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE); + dma_unmap_single(lp->dev, rx_dmabuf, rx_dmalen, DMA_FROM_DEVICE); BUG_ON(skb == NULL); lp->current_rx_skb = NULL; PRINT_PKT(skb->data, skb->len); From da203dfa89ce83c55b6623f73560ef7ec742aca4 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 11 Feb 2019 14:46:17 +0530 Subject: [PATCH 1018/1436] Revert "devlink: Add a generic wake_on_lan port parameter" This reverts commit b639583f9e36d044ac1b13090ae812266992cbac. As per discussion with Jakub Kicinski and Michal Kubecek, this will be better addressed by soon-too-come ethtool netlink API with additional indication that given configuration request is supposed to be persisted. Also, remove the parameter support from bnxt_en driver. Cc: Jiri Pirko Cc: Michael Chan Cc: Michal Kubecek Suggested-by: Jakub Kicinski Signed-off-by: Vasundhara Volam Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 19 +------------------ .../net/ethernet/broadcom/bnxt/bnxt_devlink.h | 1 - include/net/devlink.h | 8 -------- net/core/devlink.c | 5 ----- 4 files changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 2955e404fd18..e1feb97bcd81 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -37,8 +37,6 @@ static const struct bnxt_dl_nvm_param nvm_params[] = { NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7}, {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK, BNXT_NVM_SHARED_CFG, 1}, - - {DEVLINK_PARAM_GENERIC_ID_WOL, NVM_OFF_WOL, BNXT_NVM_PORT_CFG, 1}, }; static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, @@ -72,8 +70,7 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE; switch (bytesize) { case 1: - if (nvm_param.num_bits == 1 && - nvm_param.id != DEVLINK_PARAM_GENERIC_ID_WOL) + if (nvm_param.num_bits == 1) buf = &val->vbool; else buf = &val->vu8; @@ -167,17 +164,6 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id, return 0; } -static int bnxt_dl_wol_validate(struct devlink *dl, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - if (val.vu8 && val.vu8 != DEVLINK_PARAM_WAKE_MAGIC) { - NL_SET_ERR_MSG_MOD(extack, "WOL type is not supported"); - return -EINVAL; - } - return 0; -} - static const struct devlink_param bnxt_dl_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), @@ -203,9 +189,6 @@ static const struct devlink_param bnxt_dl_params[] = { }; static const struct devlink_param bnxt_dl_port_params[] = { - DEVLINK_PARAM_GENERIC(WOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT), - bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, - bnxt_dl_wol_validate), }; int bnxt_dl_register(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index da065ca84cc7..5b6b2c7d97cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -35,7 +35,6 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 -#define NVM_OFF_WOL 152 #define NVM_OFF_IGNORE_ARI 164 #define NVM_OFF_DIS_GRE_VER_CHECK 171 #define NVM_OFF_ENABLE_SRIOV 401 diff --git a/include/net/devlink.h b/include/net/devlink.h index 07660fe4c0e3..c6d88759b7d5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -370,17 +370,12 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, - DEVLINK_PARAM_GENERIC_ID_WOL, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1, }; -enum devlink_param_wol_types { - DEVLINK_PARAM_WAKE_MAGIC = (1 << 0), -}; - #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset" #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL @@ -405,9 +400,6 @@ enum devlink_param_wol_types { #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 -#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan" -#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8 - #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index ec02459eea94..d3dfdcacf7eb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2701,11 +2701,6 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, - { - .id = DEVLINK_PARAM_GENERIC_ID_WOL, - .name = DEVLINK_PARAM_GENERIC_WOL_NAME, - .type = DEVLINK_PARAM_GENERIC_WOL_TYPE, - }, }; static int devlink_param_generic_verify(const struct devlink_param *param) From 0946cf1dc7b6da951b4eb3fbcfd8e926dd0e787b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 11:46:01 +0000 Subject: [PATCH 1019/1436] net: phylink: only call mac_config() during resolve when link is up There's little point calling mac_config() when the link is down. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2e21ce42e388..a148866cbb14 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -302,6 +302,13 @@ static void phylink_mac_config(struct phylink *pl, pl->ops->mac_config(pl->netdev, pl->link_an_mode, state); } +static void phylink_mac_config_up(struct phylink *pl, + const struct phylink_link_state *state) +{ + if (state->link) + phylink_mac_config(pl, state); +} + static void phylink_mac_an_restart(struct phylink *pl) { if (pl->link_config.an_enabled && @@ -401,12 +408,12 @@ static void phylink_resolve(struct work_struct *w) case MLO_AN_PHY: link_state = pl->phy_state; phylink_resolve_flow(pl, &link_state); - phylink_mac_config(pl, &link_state); + phylink_mac_config_up(pl, &link_state); break; case MLO_AN_FIXED: phylink_get_fixed_state(pl, &link_state); - phylink_mac_config(pl, &link_state); + phylink_mac_config_up(pl, &link_state); break; case MLO_AN_INBAND: From 86e58135bc4ac68b41be11d82f1b1f87cb6119ba Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 11:46:06 +0000 Subject: [PATCH 1020/1436] net: phylink: add phylink_init_eee() helper Provide phylink_init_eee() to allow MAC drivers to initialise PHY EEE from within the ethtool set_eee() method. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 18 ++++++++++++++++++ include/linux/phylink.h | 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a148866cbb14..33f66dcd369a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1271,6 +1271,24 @@ int phylink_get_eee_err(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_get_eee_err); +/** + * phylink_init_eee() - init and check the EEE features + * @pl: a pointer to a &struct phylink returned from phylink_create() + * @clk_stop_enable: allow PHY to stop receive clock + * + * Must be called either with RTNL held or within mac_link_up() + */ +int phylink_init_eee(struct phylink *pl, bool clk_stop_enable) +{ + int ret = -EOPNOTSUPP; + + if (pl->phydev) + ret = phy_init_eee(pl->phydev, clk_stop_enable); + + return ret; +} +EXPORT_SYMBOL_GPL(phylink_init_eee); + /** * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 021fc6595856..f57059e4353f 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -220,6 +220,7 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); +int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); From a7603ac1fc8ce1409f8ff70e6ce505f308b2c002 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 11 Feb 2019 15:32:36 +0100 Subject: [PATCH 1021/1436] geneve: change NET_UDP_TUNNEL dependency to select Due to the depends on NET_UDP_TUNNEL, at the moment it is impossible to compile GENEVE if no other protocol depending on NET_UDP_TUNNEL is selected. Fix this changing the depends to a select, and drop NET_IP_TUNNEL from the select list, as it already depends on NET_UDP_TUNNEL. Signed-off-by: Matteo Croce Reviewed-and-tested-by: Andrea Claudi Tested-by: Davide Caratti Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index edb1c023a753..21bf8ac78380 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -197,9 +197,9 @@ config VXLAN config GENEVE tristate "Generic Network Virtualization Encapsulation" - depends on INET && NET_UDP_TUNNEL + depends on INET depends on IPV6 || !IPV6 - select NET_IP_TUNNEL + select NET_UDP_TUNNEL select GRO_CELLS ---help--- This allows one to create geneve virtual interfaces that provide From 87454b6edc1b0143fdb3d9853285477e95af74a4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 15:04:24 +0000 Subject: [PATCH 1022/1436] net: phylink: avoid resolving link state too early During testing on Armada 388 platforms, it was found with a certain module configuration that it was possible to trigger a kernel oops during the module load process, caused by the phylink resolver being triggered for a currently disabled interface. This problem was introduced by changing the way the SFP registration works, which now can result in the sfp link down notification being called during phylink_create(). Fixes: b5bfc21af5cb ("net: sfp: do not probe SFP module before we're attached") Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e7becc7379d7..938803237d7f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -474,6 +474,17 @@ static void phylink_run_resolve(struct phylink *pl) queue_work(system_power_efficient_wq, &pl->resolve); } +static void phylink_run_resolve_and_disable(struct phylink *pl, int bit) +{ + unsigned long state = pl->phylink_disable_state; + + set_bit(bit, &pl->phylink_disable_state); + if (state == 0) { + queue_work(system_power_efficient_wq, &pl->resolve); + flush_work(&pl->resolve); + } +} + static void phylink_fixed_poll(struct timer_list *t) { struct phylink *pl = container_of(t, struct phylink, link_poll); @@ -924,9 +935,7 @@ void phylink_stop(struct phylink *pl) if (pl->link_an_mode == MLO_AN_FIXED && !IS_ERR(pl->link_gpio)) del_timer_sync(&pl->link_poll); - set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_STOPPED); } EXPORT_SYMBOL_GPL(phylink_stop); @@ -1632,9 +1641,7 @@ static void phylink_sfp_link_down(void *upstream) ASSERT_RTNL(); - set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); - queue_work(system_power_efficient_wq, &pl->resolve); - flush_work(&pl->resolve); + phylink_run_resolve_and_disable(pl, PHYLINK_DISABLE_LINK); } static void phylink_sfp_link_up(void *upstream) From 29dded89e80e3fff61efb34f07a8a3fba3ea146d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Feb 2019 18:04:17 +0200 Subject: [PATCH 1023/1436] net/mlx4_en: Force CHECKSUM_NONE for short ethernet frames When an ethernet frame is padded to meet the minimum ethernet frame size, the padding octets are not covered by the hardware checksum. Fortunately the padding octets are usually zero's, which don't affect checksum. However, it is not guaranteed. For example, switches might choose to make other use of these octets. This repeatedly causes kernel hardware checksum fault. Prior to the cited commit below, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, we need to keep skb->csum updated. However, fixing up CHECKSUM_COMPLETE requires to verify and parse IP headers, it does not worth the effort as the packets are so small that CHECKSUM_COMPLETE has no significant advantage. Future work: when reporting checksum complete is not an option for IP non-TCP/UDP packets, we can actually fallback to report checksum unnecessary, by looking at cqe IPOK bit. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9a0881cb7f51..6c01314e87b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -617,6 +617,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + /* We reach this function only after checking that any of * the (IPv4 | IPv6) bits are set in cqe->status. */ @@ -624,9 +626,20 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; - void *hdr = (u8 *)va + sizeof(struct ethhdr); + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -819,6 +832,11 @@ xdp_drop_no_cnt: skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && From b7d286f01b9d9021695188092e36f405f2ecd276 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 10:23:10 +0000 Subject: [PATCH 1024/1436] net: marvell: mvpp2: add mvpp2_is_xlg() helper Add a mvpp2_is_xlg() helper to identify whether the interface mode should be using the XLGMAC rather than the GMAC. Signed-off-by: Russell King Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index eef8833e5aae..8b42b0367c35 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -965,6 +965,11 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask) } /* Port configuration routines */ +static bool mvpp2_is_xlg(phy_interface_t interface) +{ + return interface == PHY_INTERFACE_MODE_10GKR || + interface == PHY_INTERFACE_MODE_XAUI; +} static void mvpp22_gop_init_rgmii(struct mvpp2_port *port) { @@ -1181,9 +1186,7 @@ static void mvpp2_port_enable(struct mvpp2_port *port) u32 val; /* Only GOP port 0 has an XLG MAC */ - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_CTRL0_REG); val |= MVPP22_XLG_CTRL0_PORT_EN | MVPP22_XLG_CTRL0_MAC_RESET_DIS; @@ -1202,9 +1205,7 @@ static void mvpp2_port_disable(struct mvpp2_port *port) u32 val; /* Only GOP port 0 has an XLG MAC */ - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_CTRL0_REG); val &= ~MVPP22_XLG_CTRL0_PORT_EN; writel(val, port->base + MVPP22_XLG_CTRL0_REG); @@ -3146,8 +3147,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) ctrl3 = readl(port->base + MVPP22_XLG_CTRL3_REG); ctrl3 &= ~MVPP22_XLG_CTRL3_MACMODESELECT_MASK; - if (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(port->phy_interface)) ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_10G; else ctrl3 |= MVPP22_XLG_CTRL3_MACMODESELECT_GMAC; @@ -3155,9 +3155,7 @@ static void mvpp22_mode_reconfigure(struct mvpp2_port *port) writel(ctrl3, port->base + MVPP22_XLG_CTRL3_REG); } - if (port->gop_id == 0 && - (port->phy_interface == PHY_INTERFACE_MODE_XAUI || - port->phy_interface == PHY_INTERFACE_MODE_10GKR)) + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) mvpp2_xlg_max_rx_size_set(port); else mvpp2_gmac_max_rx_size_set(port); From 1d9b041e9c5cb6a06e0943f14daf9806e6fe55f1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 10:23:15 +0000 Subject: [PATCH 1025/1436] net: marvell: mvpp2: use mvpp2_is_xlg() helper elsewhere There are several places which make the decision whether to access the XLGMAC vs GMAC that only check for PHY_INTERFACE_MODE_10GKR and not its XAUI variant. Switch these to use the new helper so that we have consistency through the driver. Signed-off-by: Russell King Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8b42b0367c35..191d9ce85b7e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1106,7 +1106,7 @@ static void mvpp22_gop_unmask_irq(struct mvpp2_port *port) if (port->gop_id == 0) { /* Enable the XLG/GIG irqs for this port */ val = readl(port->base + MVPP22_XLG_EXT_INT_MASK); - if (port->phy_interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(port->phy_interface)) val |= MVPP22_XLG_EXT_INT_MASK_XLG; else val |= MVPP22_XLG_EXT_INT_MASK_GIG; @@ -2456,8 +2456,7 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id) mvpp22_gop_mask_irq(port); - if (port->gop_id == 0 && - port->phy_interface == PHY_INTERFACE_MODE_10GKR) { + if (port->gop_id == 0 && mvpp2_is_xlg(port->phy_interface)) { val = readl(port->base + MVPP22_XLG_INT_STAT); if (val & MVPP22_XLG_INT_STAT_LINK) { event = true; @@ -4665,7 +4664,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, bool change_interface = port->phy_interface != state->interface; /* Check for invalid configuration */ - if (state->interface == PHY_INTERFACE_MODE_10GKR && port->gop_id != 0) { + if (mvpp2_is_xlg(state->interface) && port->gop_id != 0) { netdev_err(dev, "Invalid mode on %s\n", dev->name); return; } @@ -4685,7 +4684,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, } /* mac (re)configuration */ - if (state->interface == PHY_INTERFACE_MODE_10GKR) + if (mvpp2_is_xlg(state->interface)) mvpp2_xlg_config(port, mode, state); else if (phy_interface_mode_is_rgmii(state->interface) || phy_interface_mode_is_8023z(state->interface) || @@ -4707,8 +4706,7 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, struct mvpp2_port *port = netdev_priv(dev); u32 val; - if (!phylink_autoneg_inband(mode) && - interface != PHY_INTERFACE_MODE_10GKR) { + if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) { val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); val &= ~MVPP2_GMAC_FORCE_LINK_DOWN; val |= MVPP2_GMAC_FORCE_LINK_PASS; @@ -4728,8 +4726,7 @@ static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode, struct mvpp2_port *port = netdev_priv(dev); u32 val; - if (!phylink_autoneg_inband(mode) && - interface != PHY_INTERFACE_MODE_10GKR) { + if (!phylink_autoneg_inband(mode) && !mvpp2_is_xlg(interface)) { val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); val &= ~MVPP2_GMAC_FORCE_LINK_PASS; val |= MVPP2_GMAC_FORCE_LINK_DOWN; From fde55ea74ce69b1ba2fb108b197f7c3c6b8749e1 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 11 Feb 2019 19:09:07 +0800 Subject: [PATCH 1026/1436] devlink: use direct return of genlmsg_reply This can remove redundant check Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/core/devlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index d3dfdcacf7eb..283c3ed9f25e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4356,11 +4356,8 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, err = -EMSGSIZE; goto nla_put_failure; } - err = genlmsg_reply(skb, info); - if (err) - return err; - return 0; + return genlmsg_reply(skb, info); nla_put_failure: nlmsg_free(skb); From c0ab4732d4c658f74eb929d3f615d390f03d660c Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Mon, 11 Feb 2019 11:31:05 +0000 Subject: [PATCH 1027/1436] net/tls: Do not use async crypto for non-data records Addition of tls1.3 support broke tls1.2 handshake when async crypto accelerator is used. This is because the record type for non-data records is not propagated to user application. Also when async decryption happens, the decryption does not stop when two different types of records get dequeued and submitted for decryption. To address it, we decrypt tls1.2 non-data records in synchronous way. We check whether the record we just processed has same type as the previous one before checking for async condition and jumping to dequeue next record. Fixes: 130b392c6cd6b ("net: tls: Add tls 1.3 support") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fe8c287cbaa1..ae4784734547 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1645,10 +1645,10 @@ int tls_sw_recvmsg(struct sock *sk, do { bool retain_skb = false; - bool async = false; bool zc = false; int to_decrypt; int chunk = 0; + bool async; skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { @@ -1674,18 +1674,21 @@ int tls_sw_recvmsg(struct sock *sk, tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) zc = true; + /* Do not use async mode if record is non-data */ + if (ctx->control == TLS_RECORD_TYPE_DATA) + async = ctx->async_capable; + else + async = false; + err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, ctx->async_capable); + &chunk, &zc, async); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) { - async = true; + if (err == -EINPROGRESS) num_async++; - goto pick_next_record; - } if (!cmsg) { int cerr; @@ -1704,6 +1707,9 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } + if (async) + goto pick_next_record; + if (!zc) { if (rxm->full_len > len) { retain_skb = true; From d1f20798a119be71746949ba9b2e2ff330fdc038 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 11 Feb 2019 19:32:20 +0800 Subject: [PATCH 1028/1436] ipv6: propagate genlmsg_reply return code genlmsg_reply can fail, so propagate its return code Fixes: 915d7e5e593 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/seg6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 8d0ba757a46c..9b2f272ca164 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); From 9f771f1f52f26acd61479eb1919801cbbfab3a57 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 11 Feb 2019 22:16:13 +0100 Subject: [PATCH 1029/1436] net: phy: simplify genphy_config_eee_advert Use new function phy_modify_mmd_changed(), the result speaks for itself. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3d14e48aebc5..2c61282a2726 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1578,31 +1578,16 @@ static int genphy_config_advert(struct phy_device *phydev) */ static int genphy_config_eee_advert(struct phy_device *phydev) { - int broken = phydev->eee_broken_modes; - int old_adv, adv; + int err; /* Nothing to disable */ - if (!broken) + if (!phydev->eee_broken_modes) return 0; - /* If the following call fails, we assume that EEE is not - * supported by the phy. If we read 0, EEE is not advertised - * In both case, we don't need to continue - */ - adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV); - if (adv <= 0) - return 0; - - old_adv = adv; - adv &= ~broken; - - /* Advertising remains unchanged with the broken mask */ - if (old_adv == adv) - return 0; - - phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv); - - return 1; + err = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, + phydev->eee_broken_modes, 0); + /* If the call failed, we assume that EEE is not supported */ + return err < 0 ? 0 : err; } /** From 1ecb195753a18b44df29238994026d5e4cb64db3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Feb 2019 13:17:47 -0800 Subject: [PATCH 1030/1436] mlxsw: spectrum_switchdev: Remove getting PORT_BRIDGE_FLAGS There is no code that will query the SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS attribute remove support for that. Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 95e37de3e48f..4c5780f8f4b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -431,19 +431,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) mlxsw_sp_bridge_vlan_destroy(bridge_vlan); } -static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge, - struct net_device *dev, - unsigned long *brport_flags) -{ - struct mlxsw_sp_bridge_port *bridge_port; - - bridge_port = mlxsw_sp_bridge_port_find(bridge, dev); - if (WARN_ON(!bridge_port)) - return; - - memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags)); -} - static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { @@ -451,10 +438,6 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev, - &attr->u.brport_flags); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; From 610d2b601bbab632e91c6edfb15c44790ad85d32 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Feb 2019 13:17:48 -0800 Subject: [PATCH 1031/1436] rocker: Remove getting PORT_BRIDGE_FLAGS There is no code that attempts to get the SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS attribute, remove support for that. Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.h | 2 -- drivers/net/ethernet/rocker/rocker_main.c | 15 --------------- drivers/net/ethernet/rocker/rocker_ofdpa.c | 10 ---------- 3 files changed, 27 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 748fb12260a6..2b2e1c4f0dc3 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -109,8 +109,6 @@ struct rocker_world_ops { int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port, unsigned long brport_flags, struct switchdev_trans *trans); - int (*port_attr_bridge_flags_get)(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags); int (*port_attr_bridge_flags_support_get)(const struct rocker_port * rocker_port, unsigned long * diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 66f72f8c46e5..5ce8d5aba603 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1582,17 +1582,6 @@ rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, trans); } -static int -rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_attr_bridge_flags_get) - return -EOPNOTSUPP; - return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags); -} - static int rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port * rocker_port, @@ -2061,10 +2050,6 @@ static int rocker_port_attr_get(struct net_device *dev, int err = 0; switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = rocker_world_port_attr_bridge_flags_get(rocker_port, - &attr->u.brport_flags); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: err = rocker_world_port_attr_bridge_flags_support_get(rocker_port, &attr->u.brport_flags_support); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bea7895930f6..9c07f6040720 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2511,16 +2511,6 @@ static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port, return err; } -static int -ofdpa_port_attr_bridge_flags_get(const struct rocker_port *rocker_port, - unsigned long *p_brport_flags) -{ - const struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - *p_brport_flags = ofdpa_port->brport_flags; - return 0; -} - static int ofdpa_port_attr_bridge_flags_support_get(const struct rocker_port * rocker_port, From 1b8b589d91038e9641f1a3b80bbe145bf7d0706b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Feb 2019 13:17:49 -0800 Subject: [PATCH 1032/1436] staging: fsl-dpaa2: ethsw: Remove getting PORT_BRIDGE_FLAGS There is no code that tries to get the attribute SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, remove support for doing that. Signed-off-by: Florian Fainelli Acked-by: Greg Kroah-Hartman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index e559f4c25cf7..e5a14c7c777f 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -646,11 +646,6 @@ static int swdev_port_attr_get(struct net_device *netdev, struct ethsw_port_priv *port_priv = netdev_priv(netdev); switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - attr->u.brport_flags = - (port_priv->ethsw_data->learning ? BR_LEARNING : 0) | - (port_priv->flood ? BR_FLOOD : 0); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; break; From 4ea7b0cf0da7494d5c7b8dc328493c50640d0cbc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 11 Feb 2019 13:02:25 -0800 Subject: [PATCH 1033/1436] net/skbuff: fix up kernel-doc placement There are several skb_* functions where the locked and unlocked functions are confusingly documented. For several of them, the kernel-doc for the unlocked version is placed above the locked version, which to the casual reader makes it seems like the locked version "takes no locks and you must therefore hold required locks before calling it." One can see, for example, that this link claims to document skb_queue_head(), while instead describing __skb_queue_head(). https://www.kernel.org/doc/html/latest/networking/kapi.html#c.skb_queue_head The correct documentation for skb_queue_head() is also included further down the page. This diff tested via: $ scripts/kernel-doc -rst include/linux/skbuff.h net/core/skbuff.c No new warnings were seen, and the output makes a little more sense. Signed-off-by: Brian Norris Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 831846617d07..a41e84f7730c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1889,12 +1889,12 @@ static inline void __skb_queue_before(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail @@ -1906,12 +1906,12 @@ static inline void __skb_queue_head(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with @@ -1938,7 +1938,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1946,6 +1945,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue @@ -1955,7 +1955,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1963,6 +1962,7 @@ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) @@ -2653,13 +2653,13 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb(skb); } +void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); @@ -3028,7 +3028,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) } /** - * skb_put_padto - increase size and pad an skbuff up to a minimal size + * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error From 1e562c815e67185e030bcaa06323e95d85d80987 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:56 +0800 Subject: [PATCH 1034/1436] ptp_qoriq: make structure/function names more consistent Strings containing "ptp_qoriq" or "qoriq_ptp" which were used for structure/function names were complained by users. Let's just use the unique "ptp_qoriq" to make these names more consistent. This patch is just to unify the names using "ptp_qoriq". It hasn't changed any functions. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- .../ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +- .../net/ethernet/freescale/gianfar_ethtool.c | 2 +- drivers/ptp/ptp_qoriq.c | 288 +++++++++--------- drivers/ptp/ptp_qoriq_debugfs.c | 36 +-- include/linux/fsl/ptp_qoriq.h | 14 +- 5 files changed, 171 insertions(+), 171 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 62497119c85f..bdee441bc3b7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -501,7 +501,7 @@ static int dpaa_get_ts_info(struct net_device *net_dev, struct device_node *mac_node = dev->of_node; struct device_node *fman_node = NULL, *ptp_node = NULL; struct platform_device *ptp_dev = NULL; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 241325c35cb4..27ed995f439a 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1492,7 +1492,7 @@ static int gfar_get_ts_info(struct net_device *dev, struct gfar_private *priv = netdev_priv(dev); struct platform_device *ptp_dev; struct device_node *ptp_node; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 43416b2e8a13..8c10d0f8864f 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -37,10 +37,10 @@ * Register access functions */ -/* Caller must hold qoriq_ptp->lock. */ -static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; @@ -51,10 +51,10 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp) return ns; } -/* Caller must hold qoriq_ptp->lock. */ -static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns) +/* Caller must hold ptp_qoriq->lock. */ +static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 hi = ns >> 32; u32 lo = ns & 0xffffffff; @@ -62,36 +62,36 @@ static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns) qoriq_write(®s->ctrl_regs->tmr_cnt_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_alarm(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_alarm(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; - ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL; + ns = tmr_cnt_read(ptp_qoriq) + 1500000000ULL; ns = div_u64(ns, 1000000000UL) * 1000000000ULL; - ns -= qoriq_ptp->tclk_period; + ns -= ptp_qoriq->tclk_period; hi = ns >> 32; lo = ns & 0xffffffff; qoriq_write(®s->alarm_regs->tmr_alarm1_l, lo); qoriq_write(®s->alarm_regs->tmr_alarm1_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_fipers(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_fipers(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - set_alarm(qoriq_ptp); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); + set_alarm(ptp_qoriq); + qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } -static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, +static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; void __iomem *reg_etts_l; void __iomem *reg_etts_h; @@ -122,11 +122,11 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, if (update_event) { event.timestamp = ((u64) hi) << 32; event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } stat = qoriq_read(®s->ctrl_regs->tmr_stat); - } while (qoriq_ptp->extts_fifo_support && (stat & valid)); + } while (ptp_qoriq->extts_fifo_support && (stat & valid)); return 0; } @@ -137,61 +137,61 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, static irqreturn_t isr(int irq, void *priv) { - struct qoriq_ptp *qoriq_ptp = priv; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = priv; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; u64 ns; u32 ack = 0, lo, hi, mask, val, irqs; - spin_lock(&qoriq_ptp->lock); + spin_lock(&ptp_qoriq->lock); val = qoriq_read(®s->ctrl_regs->tmr_tevent); mask = qoriq_read(®s->ctrl_regs->tmr_temask); - spin_unlock(&qoriq_ptp->lock); + spin_unlock(&ptp_qoriq->lock); irqs = val & mask; if (irqs & ETS1) { ack |= ETS1; - extts_clean_up(qoriq_ptp, 0, true); + extts_clean_up(ptp_qoriq, 0, true); } if (irqs & ETS2) { ack |= ETS2; - extts_clean_up(qoriq_ptp, 1, true); + extts_clean_up(ptp_qoriq, 1, true); } if (irqs & ALM2) { ack |= ALM2; - if (qoriq_ptp->alarm_value) { + if (ptp_qoriq->alarm_value) { event.type = PTP_CLOCK_ALARM; event.index = 0; - event.timestamp = qoriq_ptp->alarm_value; - ptp_clock_event(qoriq_ptp->clock, &event); + event.timestamp = ptp_qoriq->alarm_value; + ptp_clock_event(ptp_qoriq->clock, &event); } - if (qoriq_ptp->alarm_interval) { - ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval; + if (ptp_qoriq->alarm_interval) { + ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); - qoriq_ptp->alarm_value = ns; + ptp_qoriq->alarm_value = ns; } else { - spin_lock(&qoriq_ptp->lock); + spin_lock(&ptp_qoriq->lock); mask = qoriq_read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock(&qoriq_ptp->lock); - qoriq_ptp->alarm_value = 0; - qoriq_ptp->alarm_interval = 0; + spin_unlock(&ptp_qoriq->lock); + ptp_qoriq->alarm_value = 0; + ptp_qoriq->alarm_interval = 0; } } if (irqs & PP1) { ack |= PP1; event.type = PTP_CLOCK_PPS; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } if (ack) { @@ -210,14 +210,14 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) u64 adj, diff; u32 tmr_add; int neg_adj = 0; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; if (scaled_ppm < 0) { neg_adj = 1; scaled_ppm = -scaled_ppm; } - tmr_add = qoriq_ptp->tmr_add; + tmr_add = ptp_qoriq->tmr_add; adj = tmr_add; /* calculate diff as adj*(scaled_ppm/65536)/1000000 @@ -238,16 +238,16 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) { s64 now; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - now = tmr_cnt_read(qoriq_ptp); + now = tmr_cnt_read(ptp_qoriq); now += delta; - tmr_cnt_write(qoriq_ptp, now); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, now); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -257,13 +257,13 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - ns = tmr_cnt_read(qoriq_ptp); + ns = tmr_cnt_read(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); *ts = ns_to_timespec64(ns); @@ -275,16 +275,16 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); ns = timespec64_to_ns(ts); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - tmr_cnt_write(qoriq_ptp, ns); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, ns); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -292,8 +292,8 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, static int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; unsigned long flags; u32 bit, mask = 0; @@ -311,7 +311,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, } if (on) - extts_clean_up(qoriq_ptp, rq->extts.index, false); + extts_clean_up(ptp_qoriq, rq->extts.index, false); break; case PTP_CLK_REQ_PPS: @@ -321,7 +321,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); mask = qoriq_read(®s->ctrl_regs->tmr_temask); if (on) { @@ -333,7 +333,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -354,7 +354,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { }; /** - * qoriq_ptp_nominal_freq - calculate nominal frequency according to + * ptp_qoriq_nominal_freq - calculate nominal frequency according to * reference clock frequency * * @clk_src: reference clock frequency @@ -365,7 +365,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { * * Return the nominal frequency */ -static u32 qoriq_ptp_nominal_freq(u32 clk_src) +static u32 ptp_qoriq_nominal_freq(u32 clk_src) { u32 remainder = 0; @@ -385,9 +385,9 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) } /** - * qoriq_ptp_auto_config - calculate a set of default configurations + * ptp_qoriq_auto_config - calculate a set of default configurations * - * @qoriq_ptp: pointer to qoriq_ptp + * @ptp_qoriq: pointer to ptp_qoriq * @node: pointer to device_node * * If below dts properties are not provided, this function will be @@ -401,7 +401,7 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) * * Return 0 if success */ -static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, +static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, struct device_node *node) { struct clk *clk; @@ -411,7 +411,7 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, u32 remainder = 0; u32 clk_src = 0; - qoriq_ptp->cksel = DEFAULT_CKSEL; + ptp_qoriq->cksel = DEFAULT_CKSEL; clk = of_clk_get(node, 0); if (!IS_ERR(clk)) { @@ -424,12 +424,12 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, return -EINVAL; } - nominal_freq = qoriq_ptp_nominal_freq(clk_src); + nominal_freq = ptp_qoriq_nominal_freq(clk_src); if (!nominal_freq) return -EINVAL; - qoriq_ptp->tclk_period = 1000000000UL / nominal_freq; - qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC; + ptp_qoriq->tclk_period = 1000000000UL / nominal_freq; + ptp_qoriq->tmr_prsc = DEFAULT_TMR_PRSC; /* Calculate initial frequency compensation value for TMR_ADD register. * freq_comp = ceil(2^32 / freq_ratio) @@ -440,171 +440,171 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, if (remainder) freq_comp++; - qoriq_ptp->tmr_add = freq_comp; - qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period; - qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period; + ptp_qoriq->tmr_add = freq_comp; + ptp_qoriq->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - ptp_qoriq->tclk_period; + ptp_qoriq->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - ptp_qoriq->tclk_period; /* max_adj = 1000000000 * (freq_ratio - 1.0) - 1 * freq_ratio = reference_clock_freq / nominal_freq */ max_adj = 1000000000ULL * (clk_src - nominal_freq); max_adj = div_u64(max_adj, nominal_freq) - 1; - qoriq_ptp->caps.max_adj = max_adj; + ptp_qoriq->caps.max_adj = max_adj; return 0; } -static int qoriq_ptp_probe(struct platform_device *dev) +static int ptp_qoriq_probe(struct platform_device *dev) { struct device_node *node = dev->dev.of_node; - struct qoriq_ptp *qoriq_ptp; - struct qoriq_ptp_registers *regs; + struct ptp_qoriq *ptp_qoriq; + struct ptp_qoriq_registers *regs; struct timespec64 now; int err = -ENOMEM; u32 tmr_ctrl; unsigned long flags; void __iomem *base; - qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL); - if (!qoriq_ptp) + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) goto no_memory; err = -EINVAL; - qoriq_ptp->dev = &dev->dev; - qoriq_ptp->caps = ptp_qoriq_caps; + ptp_qoriq->dev = &dev->dev; + ptp_qoriq->caps = ptp_qoriq_caps; - if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) - qoriq_ptp->cksel = DEFAULT_CKSEL; + if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) + ptp_qoriq->cksel = DEFAULT_CKSEL; if (of_property_read_bool(node, "fsl,extts-fifo")) - qoriq_ptp->extts_fifo_support = true; + ptp_qoriq->extts_fifo_support = true; else - qoriq_ptp->extts_fifo_support = false; + ptp_qoriq->extts_fifo_support = false; if (of_property_read_u32(node, - "fsl,tclk-period", &qoriq_ptp->tclk_period) || + "fsl,tclk-period", &ptp_qoriq->tclk_period) || of_property_read_u32(node, - "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) || + "fsl,tmr-prsc", &ptp_qoriq->tmr_prsc) || of_property_read_u32(node, - "fsl,tmr-add", &qoriq_ptp->tmr_add) || + "fsl,tmr-add", &ptp_qoriq->tmr_add) || of_property_read_u32(node, - "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) || + "fsl,tmr-fiper1", &ptp_qoriq->tmr_fiper1) || of_property_read_u32(node, - "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) || + "fsl,tmr-fiper2", &ptp_qoriq->tmr_fiper2) || of_property_read_u32(node, - "fsl,max-adj", &qoriq_ptp->caps.max_adj)) { + "fsl,max-adj", &ptp_qoriq->caps.max_adj)) { pr_warn("device tree node missing required elements, try automatic configuration\n"); - if (qoriq_ptp_auto_config(qoriq_ptp, node)) + if (ptp_qoriq_auto_config(ptp_qoriq, node)) goto no_config; } err = -ENODEV; - qoriq_ptp->irq = platform_get_irq(dev, 0); + ptp_qoriq->irq = platform_get_irq(dev, 0); - if (qoriq_ptp->irq < 0) { + if (ptp_qoriq->irq < 0) { pr_err("irq not in device tree\n"); goto no_node; } - if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) { + if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) { pr_err("request_irq failed\n"); goto no_node; } - qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!qoriq_ptp->rsrc) { + ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!ptp_qoriq->rsrc) { pr_err("no resource\n"); goto no_resource; } - if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) { + if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { pr_err("resource busy\n"); goto no_resource; } - spin_lock_init(&qoriq_ptp->lock); + spin_lock_init(&ptp_qoriq->lock); - base = ioremap(qoriq_ptp->rsrc->start, - resource_size(qoriq_ptp->rsrc)); + base = ioremap(ptp_qoriq->rsrc->start, + resource_size(ptp_qoriq->rsrc)); if (!base) { pr_err("ioremap ptp registers failed\n"); goto no_ioremap; } - qoriq_ptp->base = base; + ptp_qoriq->base = base; if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { - qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; + ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; } else { - qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET; + ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET; } ktime_get_real_ts64(&now); - ptp_qoriq_settime(&qoriq_ptp->caps, &now); + ptp_qoriq_settime(&ptp_qoriq->caps, &now); tmr_ctrl = - (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | - (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT; + (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | + (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - regs = &qoriq_ptp->regs; + regs = &ptp_qoriq->regs; qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); - qoriq_write(®s->ctrl_regs->tmr_add, qoriq_ptp->tmr_add); - qoriq_write(®s->ctrl_regs->tmr_prsc, qoriq_ptp->tmr_prsc); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); - set_alarm(qoriq_ptp); + qoriq_write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); + qoriq_write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); + qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + set_alarm(ptp_qoriq); qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl|FIPERST|RTPE|TE|FRD); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); - qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev); - if (IS_ERR(qoriq_ptp->clock)) { - err = PTR_ERR(qoriq_ptp->clock); + ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev); + if (IS_ERR(ptp_qoriq->clock)) { + err = PTR_ERR(ptp_qoriq->clock); goto no_clock; } - qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock); + ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); - ptp_qoriq_create_debugfs(qoriq_ptp); - platform_set_drvdata(dev, qoriq_ptp); + ptp_qoriq_create_debugfs(ptp_qoriq); + platform_set_drvdata(dev, ptp_qoriq); return 0; no_clock: - iounmap(qoriq_ptp->base); + iounmap(ptp_qoriq->base); no_ioremap: - release_resource(qoriq_ptp->rsrc); + release_resource(ptp_qoriq->rsrc); no_resource: - free_irq(qoriq_ptp->irq, qoriq_ptp); + free_irq(ptp_qoriq->irq, ptp_qoriq); no_config: no_node: - kfree(qoriq_ptp); + kfree(ptp_qoriq); no_memory: return err; } -static int qoriq_ptp_remove(struct platform_device *dev) +static int ptp_qoriq_remove(struct platform_device *dev) { - struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; qoriq_write(®s->ctrl_regs->tmr_temask, 0); qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); - ptp_qoriq_remove_debugfs(qoriq_ptp); - ptp_clock_unregister(qoriq_ptp->clock); - iounmap(qoriq_ptp->base); - release_resource(qoriq_ptp->rsrc); - free_irq(qoriq_ptp->irq, qoriq_ptp); - kfree(qoriq_ptp); + ptp_qoriq_remove_debugfs(ptp_qoriq); + ptp_clock_unregister(ptp_qoriq->clock); + iounmap(ptp_qoriq->base); + release_resource(ptp_qoriq->rsrc); + free_irq(ptp_qoriq->irq, ptp_qoriq); + kfree(ptp_qoriq); return 0; } @@ -616,16 +616,16 @@ static const struct of_device_id match_table[] = { }; MODULE_DEVICE_TABLE(of, match_table); -static struct platform_driver qoriq_ptp_driver = { +static struct platform_driver ptp_qoriq_driver = { .driver = { .name = "ptp_qoriq", .of_match_table = match_table, }, - .probe = qoriq_ptp_probe, - .remove = qoriq_ptp_remove, + .probe = ptp_qoriq_probe, + .remove = ptp_qoriq_remove, }; -module_platform_driver(qoriq_ptp_driver); +module_platform_driver(ptp_qoriq_driver); MODULE_AUTHOR("Richard Cochran "); MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer"); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c index 970595021088..3a70daf03727 100644 --- a/drivers/ptp/ptp_qoriq_debugfs.c +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -7,8 +7,8 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -19,8 +19,8 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -38,8 +38,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get, static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -50,8 +50,8 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -67,35 +67,35 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get, ptp_qoriq_fiper2_lpbk_set, "%llu\n"); -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { struct dentry *root; - root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL); + root = debugfs_create_dir(dev_name(ptp_qoriq->dev), NULL); if (IS_ERR(root)) return; if (!root) goto err_root; - qoriq_ptp->debugfs_root = root; + ptp_qoriq->debugfs_root = root; if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper1_fops)) + ptp_qoriq, &ptp_qoriq_fiper1_fops)) goto err_node; if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper2_fops)) + ptp_qoriq, &ptp_qoriq_fiper2_fops)) goto err_node; return; err_node: debugfs_remove_recursive(root); - qoriq_ptp->debugfs_root = NULL; + ptp_qoriq->debugfs_root = NULL; err_root: - dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n"); + dev_err(ptp_qoriq->dev, "failed to initialize debugfs\n"); } -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { - debugfs_remove_recursive(qoriq_ptp->debugfs_root); - qoriq_ptp->debugfs_root = NULL; + debugfs_remove_recursive(ptp_qoriq->debugfs_root); + ptp_qoriq->debugfs_root = NULL; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 94e9797e434c..c2a32d9ec6ba 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -49,7 +49,7 @@ struct etts_regs { u32 tmr_etts2_l; /* Timestamp of general purpose external trigger */ }; -struct qoriq_ptp_registers { +struct ptp_qoriq_registers { struct ctrl_regs __iomem *ctrl_regs; struct alarm_regs __iomem *alarm_regs; struct fiper_regs __iomem *fiper_regs; @@ -136,9 +136,9 @@ struct qoriq_ptp_registers { #define DEFAULT_FIPER1_PERIOD 1000000000 #define DEFAULT_FIPER2_PERIOD 100000 -struct qoriq_ptp { +struct ptp_qoriq { void __iomem *base; - struct qoriq_ptp_registers regs; + struct ptp_qoriq_registers regs; spinlock_t lock; /* protects regs */ struct ptp_clock *clock; struct ptp_clock_info caps; @@ -172,12 +172,12 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) } #ifdef CONFIG_DEBUG_FS -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp); -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp); +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); #else -static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { } -static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { } #endif From 73356e4ea895d5d4fb2bed30c32d3293b090f3ce Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:57 +0800 Subject: [PATCH 1035/1436] ptp_qoriq: make ptp operations global This patch is to make functions of ptp operations global, so that ENETC PTP driver which is a PCI driver for same 1588 timer IP block could reuse them. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 27 ++++++++++++++++----------- include/linux/fsl/ptp_qoriq.h | 9 +++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 8c10d0f8864f..1f3e73e62de9 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -135,7 +134,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, * Interrupt service routine */ -static irqreturn_t isr(int irq, void *priv) +irqreturn_t ptp_qoriq_isr(int irq, void *priv) { struct ptp_qoriq *ptp_qoriq = priv; struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; @@ -200,12 +199,13 @@ static irqreturn_t isr(int irq, void *priv) } else return IRQ_NONE; } +EXPORT_SYMBOL_GPL(ptp_qoriq_isr); /* * PTP clock operations */ -static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { u64 adj, diff; u32 tmr_add; @@ -233,8 +233,9 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjfine); -static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) { s64 now; unsigned long flags; @@ -251,9 +252,9 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjtime); -static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { u64 ns; unsigned long flags; @@ -269,9 +270,10 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_gettime); -static int ptp_qoriq_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) { u64 ns; unsigned long flags; @@ -288,9 +290,10 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_settime); -static int ptp_qoriq_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; @@ -336,6 +339,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_enable); static const struct ptp_clock_info ptp_qoriq_caps = { .owner = THIS_MODULE, @@ -508,7 +512,8 @@ static int ptp_qoriq_probe(struct platform_device *dev) pr_err("irq not in device tree\n"); goto no_node; } - if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) { + if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, + DRIVER, ptp_qoriq)) { pr_err("request_irq failed\n"); goto no_node; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index c2a32d9ec6ba..75e6f0523cb1 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -7,6 +7,7 @@ #define __PTP_QORIQ_H__ #include +#include #include /* @@ -171,6 +172,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) iowrite32be(val, addr); } +irqreturn_t ptp_qoriq_isr(int irq, void *priv); +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts); +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on); #ifdef CONFIG_DEBUG_FS void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); From ff54571a747bc1fc8e132ef9c512451ec6de3336 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:58 +0800 Subject: [PATCH 1036/1436] ptp_qoriq: convert to use ptp_qoriq_init/free Moved QorIQ PTP clock initialization/free into new functions ptp_qoriq_init()/ptp_qoriq_free(). These functions could also be reused by ENETC PTP drvier which is a PCI driver for same 1588 timer IP block. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 144 ++++++++++++++++++---------------- include/linux/fsl/ptp_qoriq.h | 3 + 2 files changed, 80 insertions(+), 67 deletions(-) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 1f3e73e62de9..db4f929ea4e9 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -458,25 +458,17 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, return 0; } -static int ptp_qoriq_probe(struct platform_device *dev) +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps) { - struct device_node *node = dev->dev.of_node; - struct ptp_qoriq *ptp_qoriq; + struct device_node *node = ptp_qoriq->dev->of_node; struct ptp_qoriq_registers *regs; struct timespec64 now; - int err = -ENOMEM; - u32 tmr_ctrl; unsigned long flags; - void __iomem *base; + u32 tmr_ctrl; - ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); - if (!ptp_qoriq) - goto no_memory; - - err = -EINVAL; - - ptp_qoriq->dev = &dev->dev; - ptp_qoriq->caps = ptp_qoriq_caps; + ptp_qoriq->base = base; + ptp_qoriq->caps = caps; if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) ptp_qoriq->cksel = DEFAULT_CKSEL; @@ -501,44 +493,9 @@ static int ptp_qoriq_probe(struct platform_device *dev) pr_warn("device tree node missing required elements, try automatic configuration\n"); if (ptp_qoriq_auto_config(ptp_qoriq, node)) - goto no_config; + return -ENODEV; } - err = -ENODEV; - - ptp_qoriq->irq = platform_get_irq(dev, 0); - - if (ptp_qoriq->irq < 0) { - pr_err("irq not in device tree\n"); - goto no_node; - } - if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, - DRIVER, ptp_qoriq)) { - pr_err("request_irq failed\n"); - goto no_node; - } - - ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!ptp_qoriq->rsrc) { - pr_err("no resource\n"); - goto no_resource; - } - if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { - pr_err("resource busy\n"); - goto no_resource; - } - - spin_lock_init(&ptp_qoriq->lock); - - base = ioremap(ptp_qoriq->rsrc->start, - resource_size(ptp_qoriq->rsrc)); - if (!base) { - pr_err("ioremap ptp registers failed\n"); - goto no_ioremap; - } - - ptp_qoriq->base = base; - if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; @@ -558,6 +515,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; + spin_lock_init(&ptp_qoriq->lock); spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; @@ -571,16 +529,77 @@ static int ptp_qoriq_probe(struct platform_device *dev) spin_unlock_irqrestore(&ptp_qoriq->lock, flags); - ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev); - if (IS_ERR(ptp_qoriq->clock)) { - err = PTR_ERR(ptp_qoriq->clock); - goto no_clock; - } + ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, ptp_qoriq->dev); + if (IS_ERR(ptp_qoriq->clock)) + return PTR_ERR(ptp_qoriq->clock); + ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); - ptp_qoriq_create_debugfs(ptp_qoriq); - platform_set_drvdata(dev, ptp_qoriq); + return 0; +} +EXPORT_SYMBOL_GPL(ptp_qoriq_init); +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq) +{ + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; + + qoriq_write(®s->ctrl_regs->tmr_temask, 0); + qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + + ptp_qoriq_remove_debugfs(ptp_qoriq); + ptp_clock_unregister(ptp_qoriq->clock); + iounmap(ptp_qoriq->base); + free_irq(ptp_qoriq->irq, ptp_qoriq); +} +EXPORT_SYMBOL_GPL(ptp_qoriq_free); + +static int ptp_qoriq_probe(struct platform_device *dev) +{ + struct ptp_qoriq *ptp_qoriq; + int err = -ENOMEM; + void __iomem *base; + + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) + goto no_memory; + + ptp_qoriq->dev = &dev->dev; + + err = -ENODEV; + + ptp_qoriq->irq = platform_get_irq(dev, 0); + if (ptp_qoriq->irq < 0) { + pr_err("irq not in device tree\n"); + goto no_node; + } + if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, + DRIVER, ptp_qoriq)) { + pr_err("request_irq failed\n"); + goto no_node; + } + + ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!ptp_qoriq->rsrc) { + pr_err("no resource\n"); + goto no_resource; + } + if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { + pr_err("resource busy\n"); + goto no_resource; + } + + base = ioremap(ptp_qoriq->rsrc->start, + resource_size(ptp_qoriq->rsrc)); + if (!base) { + pr_err("ioremap ptp registers failed\n"); + goto no_ioremap; + } + + err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps); + if (err) + goto no_clock; + + platform_set_drvdata(dev, ptp_qoriq); return 0; no_clock: @@ -589,7 +608,6 @@ no_ioremap: release_resource(ptp_qoriq->rsrc); no_resource: free_irq(ptp_qoriq->irq, ptp_qoriq); -no_config: no_node: kfree(ptp_qoriq); no_memory: @@ -599,18 +617,10 @@ no_memory: static int ptp_qoriq_remove(struct platform_device *dev) { struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev); - struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_temask, 0); - qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); - - ptp_qoriq_remove_debugfs(ptp_qoriq); - ptp_clock_unregister(ptp_qoriq->clock); - iounmap(ptp_qoriq->base); + ptp_qoriq_free(ptp_qoriq); release_resource(ptp_qoriq->rsrc); - free_irq(ptp_qoriq->irq, ptp_qoriq); kfree(ptp_qoriq); - return 0; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 75e6f0523cb1..757aec385493 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -173,6 +173,9 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) } irqreturn_t ptp_qoriq_isr(int irq, void *priv); +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps); +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq); int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); From f038ddf25b80be90e1af9439935bdb66fdbf5e28 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:59 +0800 Subject: [PATCH 1037/1436] ptp_qoriq: add little enadian support There is QorIQ 1588 timer IP block on the new ENETC Ethernet controller. However it uses little endian mode which is different with before. This patch is to add little endian support for the driver by using "little-endian" dts node property. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 69 +++++++++++++++++++-------------- drivers/ptp/ptp_qoriq_debugfs.c | 12 +++--- include/linux/fsl/ptp_qoriq.h | 21 +++++++--- 3 files changed, 60 insertions(+), 42 deletions(-) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index db4f929ea4e9..ed4dc398c57b 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -43,8 +43,8 @@ static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq) u64 ns; u32 lo, hi; - lo = qoriq_read(®s->ctrl_regs->tmr_cnt_l); - hi = qoriq_read(®s->ctrl_regs->tmr_cnt_h); + lo = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_l); + hi = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_h); ns = ((u64) hi) << 32; ns |= lo; return ns; @@ -57,8 +57,8 @@ static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns) u32 hi = ns >> 32; u32 lo = ns & 0xffffffff; - qoriq_write(®s->ctrl_regs->tmr_cnt_l, lo); - qoriq_write(®s->ctrl_regs->tmr_cnt_h, hi); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_l, lo); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_h, hi); } /* Caller must hold ptp_qoriq->lock. */ @@ -73,8 +73,8 @@ static void set_alarm(struct ptp_qoriq *ptp_qoriq) ns -= ptp_qoriq->tclk_period; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm1_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm1_h, hi); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_h, hi); } /* Caller must hold ptp_qoriq->lock. */ @@ -83,8 +83,8 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; set_alarm(ptp_qoriq); - qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, @@ -115,8 +115,8 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, event.index = index; do { - lo = qoriq_read(reg_etts_l); - hi = qoriq_read(reg_etts_h); + lo = ptp_qoriq->read(reg_etts_l); + hi = ptp_qoriq->read(reg_etts_h); if (update_event) { event.timestamp = ((u64) hi) << 32; @@ -124,7 +124,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, ptp_clock_event(ptp_qoriq->clock, &event); } - stat = qoriq_read(®s->ctrl_regs->tmr_stat); + stat = ptp_qoriq->read(®s->ctrl_regs->tmr_stat); } while (ptp_qoriq->extts_fifo_support && (stat & valid)); return 0; @@ -144,8 +144,8 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) spin_lock(&ptp_qoriq->lock); - val = qoriq_read(®s->ctrl_regs->tmr_tevent); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + val = ptp_qoriq->read(®s->ctrl_regs->tmr_tevent); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); spin_unlock(&ptp_qoriq->lock); @@ -173,14 +173,14 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_h, hi); ptp_qoriq->alarm_value = ns; } else { spin_lock(&ptp_qoriq->lock); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; - qoriq_write(®s->ctrl_regs->tmr_temask, mask); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); spin_unlock(&ptp_qoriq->lock); ptp_qoriq->alarm_value = 0; ptp_qoriq->alarm_interval = 0; @@ -194,7 +194,7 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) } if (ack) { - qoriq_write(®s->ctrl_regs->tmr_tevent, ack); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, ack); return IRQ_HANDLED; } else return IRQ_NONE; @@ -229,7 +229,7 @@ int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff; - qoriq_write(®s->ctrl_regs->tmr_add, tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, tmr_add); return 0; } @@ -326,15 +326,15 @@ int ptp_qoriq_enable(struct ptp_clock_info *ptp, spin_lock_irqsave(&ptp_qoriq->lock, flags); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); if (on) { mask |= bit; - qoriq_write(®s->ctrl_regs->tmr_tevent, bit); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, bit); } else { mask &= ~bit; } - qoriq_write(®s->ctrl_regs->tmr_temask, mask); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; @@ -496,6 +496,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, return -ENODEV; } + if (of_property_read_bool(node, "little-endian")) { + ptp_qoriq->read = qoriq_read_le; + ptp_qoriq->write = qoriq_write_le; + } else { + ptp_qoriq->read = qoriq_read_be; + ptp_qoriq->write = qoriq_write_be; + } + if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; @@ -519,13 +527,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); - qoriq_write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); - qoriq_write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); - qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); set_alarm(ptp_qoriq); - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl|FIPERST|RTPE|TE|FRD); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, + tmr_ctrl|FIPERST|RTPE|TE|FRD); spin_unlock_irqrestore(&ptp_qoriq->lock, flags); @@ -543,8 +552,8 @@ void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq) { struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_temask, 0); - qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, 0); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, 0); ptp_qoriq_remove_debugfs(ptp_qoriq); ptp_clock_unregister(ptp_qoriq->clock); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c index 3a70daf03727..e8dddcedf288 100644 --- a/drivers/ptp/ptp_qoriq_debugfs.c +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -11,7 +11,7 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP1L ? 1 : 0; return 0; @@ -23,13 +23,13 @@ static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP1L; else ctrl |= PP1L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } @@ -42,7 +42,7 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP2L ? 1 : 0; return 0; @@ -54,13 +54,13 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP2L; else ctrl |= PP2L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 757aec385493..1f8bb6a6a121 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -157,21 +157,30 @@ struct ptp_qoriq { u32 cksel; u32 tmr_fiper1; u32 tmr_fiper2; + u32 (*read)(unsigned __iomem *addr); + void (*write)(unsigned __iomem *addr, u32 val); }; -static inline u32 qoriq_read(unsigned __iomem *addr) +static inline u32 qoriq_read_be(unsigned __iomem *addr) { - u32 val; - - val = ioread32be(addr); - return val; + return ioread32be(addr); } -static inline void qoriq_write(unsigned __iomem *addr, u32 val) +static inline void qoriq_write_be(unsigned __iomem *addr, u32 val) { iowrite32be(val, addr); } +static inline u32 qoriq_read_le(unsigned __iomem *addr) +{ + return ioread32(addr); +} + +static inline void qoriq_write_le(unsigned __iomem *addr, u32 val) +{ + iowrite32(val, addr); +} + irqreturn_t ptp_qoriq_isr(int irq, void *priv); int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, const struct ptp_clock_info caps); From 2843bf518579e9fa357ba58708c7cff96946d084 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:00 +0800 Subject: [PATCH 1038/1436] dt-binding: ptp_qoriq: add little-endian support Specify "little-endian" property if the 1588 timer IP block is little-endian mode. The default endian mode is big-endian. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/ptp/ptp-qoriq.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt index 8e7f8551d190..454c937076a2 100644 --- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt +++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt @@ -19,6 +19,9 @@ Clock Properties: - fsl,max-adj Maximum frequency adjustment in parts per billion. - fsl,extts-fifo The presence of this property indicates hardware support for the external trigger stamp FIFO. + - little-endian The presence of this property indicates the 1588 timer + IP block is little-endian mode. The default endian mode + is big-endian. These properties set the operational parameters for the PTP clock. You must choose these carefully for the clock to work right. From d4e176870bffde373d9688c54aad8f92b3394ba6 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:01 +0800 Subject: [PATCH 1039/1436] ptp_qoriq: fix register memory map The 1588 timer on eTSEC Ethernet controller uses different register memory map with DPAA Ethernet controller. Now the new ENETC Ethernet controller uses same reigster memory map with DPAA. To support ENETC, let's use register memory map of DPAA/ENETC in default. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 11 ++++++----- include/linux/fsl/ptp_qoriq.h | 16 ++++++++-------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index ed4dc398c57b..42d3654f77f0 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -504,11 +504,12 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, ptp_qoriq->write = qoriq_write_be; } - if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { - ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; - ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; - ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; - ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; + /* The eTSEC uses differnt memory map with DPAA/ENETC */ + if (of_device_is_compatible(node, "fsl,etsec-ptp")) { + ptp_qoriq->regs.ctrl_regs = base + ETSEC_CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ETSEC_ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + ETSEC_FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETSEC_ETTS_REGS_OFFSET; } else { ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 1f8bb6a6a121..f127adb71041 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -58,15 +58,15 @@ struct ptp_qoriq_registers { }; /* Offset definitions for the four register groups */ -#define CTRL_REGS_OFFSET 0x0 -#define ALARM_REGS_OFFSET 0x40 -#define FIPER_REGS_OFFSET 0x80 -#define ETTS_REGS_OFFSET 0xa0 +#define ETSEC_CTRL_REGS_OFFSET 0x0 +#define ETSEC_ALARM_REGS_OFFSET 0x40 +#define ETSEC_FIPER_REGS_OFFSET 0x80 +#define ETSEC_ETTS_REGS_OFFSET 0xa0 -#define FMAN_CTRL_REGS_OFFSET 0x80 -#define FMAN_ALARM_REGS_OFFSET 0xb8 -#define FMAN_FIPER_REGS_OFFSET 0xd0 -#define FMAN_ETTS_REGS_OFFSET 0xe0 +#define CTRL_REGS_OFFSET 0x80 +#define ALARM_REGS_OFFSET 0xb8 +#define FIPER_REGS_OFFSET 0xd0 +#define ETTS_REGS_OFFSET 0xe0 /* Bit definitions for the TMR_CTRL register */ From ad6e1be64ae135abae8f7e51c4f2f74f4f4e5420 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:02 +0800 Subject: [PATCH 1040/1436] ptp: add QorIQ PTP support for ENETC This patch is to add QorIQ PTP support for ENETC. ENETC PTP driver which is a PCI driver for same 1588 timer IP block will reuse QorIQ PTP driver. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index aeb4a8b2e0af..7fe18636915a 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE config PTP_1588_CLOCK_QORIQ tristate "Freescale QorIQ 1588 timer as PTP clock" - depends on GIANFAR || FSL_DPAA_ETH + depends on GIANFAR || FSL_DPAA_ETH || FSL_ENETC || FSL_ENETC_VF depends on PTP_1588_CLOCK default y help From 19971f5ea0ab449818db5016ea1229a654a13a5c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:03 +0800 Subject: [PATCH 1041/1436] enetc: add PTP clock driver This patch is to add PTP clock driver for ENETC. The driver reused QorIQ PTP clock driver. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/Kconfig | 12 ++ drivers/net/ethernet/freescale/enetc/Makefile | 3 + .../net/ethernet/freescale/enetc/enetc_hw.h | 5 +- .../net/ethernet/freescale/enetc/enetc_ptp.c | 144 ++++++++++++++++++ 4 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_ptp.c diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index f9dd26fbfc83..8429f5c1d810 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -17,3 +17,15 @@ config FSL_ENETC_VF virtual function (VF) devices enabled by the ENETC PF driver. If compiled as module (M), the module name is fsl-enetc-vf. + +config FSL_ENETC_PTP_CLOCK + tristate "ENETC PTP clock driver" + depends on PTP_1588_CLOCK_QORIQ && (FSL_ENETC || FSL_ENETC_VF) + default y + help + This driver adds support for using the ENETC 1588 timer + as a PTP clock. This clock is only useful if your PTP + programs are getting hardware time stamps on the PTP Ethernet + packets using the SO_TIMESTAMPING API. + + If compiled as module (M), the module name is fsl-enetc-ptp. diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index 9529b01872ca..697660294dbc 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -13,3 +13,6 @@ fsl-enetc-vf-$(CONFIG_FSL_ENETC_VF) += enetc.o enetc_cbdr.o \ enetc_ethtool.o fsl-enetc-vf-objs := enetc_vf.o $(fsl-enetc-vf-y) endif + +obj-$(CONFIG_FSL_ENETC_PTP_CLOCK) += fsl-enetc-ptp.o +fsl-enetc-ptp-$(CONFIG_FSL_ENETC_PTP_CLOCK) += enetc_ptp.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index efa0b1a5ef4f..df8eb8882d92 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -4,8 +4,9 @@ #include /* ENETC device IDs */ -#define ENETC_DEV_ID_PF 0xe100 -#define ENETC_DEV_ID_VF 0xef00 +#define ENETC_DEV_ID_PF 0xe100 +#define ENETC_DEV_ID_VF 0xef00 +#define ENETC_DEV_ID_PTP 0xee02 /* ENETC register block BAR */ #define ENETC_BAR_REGS 0 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c new file mode 100644 index 000000000000..dc2f58a7c9e5 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2019 NXP */ + +#include +#include +#include + +#include "enetc.h" + +static struct ptp_clock_info enetc_ptp_caps = { + .owner = THIS_MODULE, + .name = "ENETC PTP clock", + .max_adj = 512000, + .n_alarm = 0, + .n_ext_ts = 2, + .n_per_out = 0, + .n_pins = 0, + .pps = 1, + .adjfine = ptp_qoriq_adjfine, + .adjtime = ptp_qoriq_adjtime, + .gettime64 = ptp_qoriq_gettime, + .settime64 = ptp_qoriq_settime, + .enable = ptp_qoriq_enable, +}; + +static int enetc_ptp_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct ptp_qoriq *ptp_qoriq; + void __iomem *base; + int err, len, n; + + if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) { + dev_info(&pdev->dev, "device is disabled, skipping\n"); + return -ENODEV; + } + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "device enable failed\n"); + return err; + } + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; + } + } + + err = pci_request_mem_regions(pdev, KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err); + goto err_pci_mem_reg; + } + + pci_set_master(pdev); + + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) { + err = -ENOMEM; + goto err_alloc_ptp; + } + + len = pci_resource_len(pdev, ENETC_BAR_REGS); + + base = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len); + if (!base) { + err = -ENXIO; + dev_err(&pdev->dev, "ioremap() failed\n"); + goto err_ioremap; + } + + /* Allocate 1 interrupt */ + n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); + if (n != 1) { + err = -EPERM; + goto err_irq; + } + + ptp_qoriq->irq = pci_irq_vector(pdev, 0); + + err = request_irq(ptp_qoriq->irq, ptp_qoriq_isr, 0, DRIVER, ptp_qoriq); + if (err) { + dev_err(&pdev->dev, "request_irq() failed!\n"); + goto err_irq; + } + + ptp_qoriq->dev = &pdev->dev; + + err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps); + if (err) + goto err_no_clock; + + pci_set_drvdata(pdev, ptp_qoriq); + + return 0; + +err_no_clock: + free_irq(ptp_qoriq->irq, ptp_qoriq); +err_irq: + iounmap(base); +err_ioremap: + kfree(ptp_qoriq); +err_alloc_ptp: + pci_release_mem_regions(pdev); +err_pci_mem_reg: +err_dma: + pci_disable_device(pdev); + + return err; +} + +static void enetc_ptp_remove(struct pci_dev *pdev) +{ + struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev); + + ptp_qoriq_free(ptp_qoriq); + kfree(ptp_qoriq); + + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} + +static const struct pci_device_id enetc_ptp_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PTP) }, + { 0, } /* End of table. */ +}; +MODULE_DEVICE_TABLE(pci, enetc_ptp_id_table); + +static struct pci_driver enetc_ptp_driver = { + .name = KBUILD_MODNAME, + .id_table = enetc_ptp_id_table, + .probe = enetc_ptp_probe, + .remove = enetc_ptp_remove, +}; +module_pci_driver(enetc_ptp_driver); + +MODULE_DESCRIPTION("ENETC PTP clock driver"); +MODULE_LICENSE("Dual BSD/GPL"); From bb024c3b123b4081a4e2ed182c57054b53b05e83 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:04 +0800 Subject: [PATCH 1042/1436] MAINTAINERS: add enetc_ptp driver into QorIQ PTP list This patch to add enetc_ptp driver into QorIQ PTP list for maintaining. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f3af5cde6456..d7da0618d6b3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6104,6 +6104,7 @@ FREESCALE QORIQ PTP CLOCK DRIVER M: Yangbo Lu L: netdev@vger.kernel.org S: Maintained +F: drivers/net/ethernet/freescale/enetc/enetc_ptp.c F: drivers/ptp/ptp_qoriq.c F: drivers/ptp/ptp_qoriq_debugfs.c F: include/linux/fsl/ptp_qoriq.h From 74abc07dee613086f9c0ded9e263ddc959a6de04 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Feb 2019 18:04:17 +0200 Subject: [PATCH 1043/1436] net/mlx4_en: Force CHECKSUM_NONE for short ethernet frames When an ethernet frame is padded to meet the minimum ethernet frame size, the padding octets are not covered by the hardware checksum. Fortunately the padding octets are usually zero's, which don't affect checksum. However, it is not guaranteed. For example, switches might choose to make other use of these octets. This repeatedly causes kernel hardware checksum fault. Prior to the cited commit below, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, we need to keep skb->csum updated. However, fixing up CHECKSUM_COMPLETE requires to verify and parse IP headers, it does not worth the effort as the packets are so small that CHECKSUM_COMPLETE has no significant advantage. Future work: when reporting checksum complete is not an option for IP non-TCP/UDP packets, we can actually fallback to report checksum unnecessary, by looking at cqe IPOK bit. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9a0881cb7f51..6c01314e87b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -617,6 +617,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + /* We reach this function only after checking that any of * the (IPv4 | IPv6) bits are set in cqe->status. */ @@ -624,9 +626,20 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; - void *hdr = (u8 *)va + sizeof(struct ethhdr); + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -819,6 +832,11 @@ xdp_drop_no_cnt: skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && From 84dbea461e490d7bba3706637efd7835eb3c8205 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:16 +0100 Subject: [PATCH 1044/1436] s390/qeth: reduce data length for ARP cache query qeth_l3_query_arp_cache_info() indicates a data length that's much larger than the actual length of its request (ie. the value passed to qeth_get_setassparms_cmd()). The confusion presumably comes from the fact that the cmd _response_ can be quite large - but that's no concern for the initial request IO. Fixing this up allows us to use the generic qeth_send_ipa_cmd() infrastructure. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 --- drivers/s390/net/qeth_core_main.c | 13 ++++++------- drivers/s390/net/qeth_core_mpc.h | 5 ----- drivers/s390/net/qeth_l3_main.c | 4 +--- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 71d27a804920..f35e0b58ba4e 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1004,9 +1004,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob); struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); -int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *, - int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long), - void *reply_param); unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6ef0c89370b5..47927613d43c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2020,12 +2020,12 @@ EXPORT_SYMBOL_GPL(qeth_prepare_control_data); * field 'param' of the structure qeth_reply. */ -int qeth_send_control_data(struct qeth_card *card, int len, - struct qeth_cmd_buffer *iob, - int (*reply_cb)(struct qeth_card *cb_card, - struct qeth_reply *cb_reply, - unsigned long cb_cmd), - void *reply_param) +static int qeth_send_control_data(struct qeth_card *card, int len, + struct qeth_cmd_buffer *iob, + int (*reply_cb)(struct qeth_card *cb_card, + struct qeth_reply *cb_reply, + unsigned long cb_cmd), + void *reply_param) { struct qeth_channel *channel = iob->channel; int rc; @@ -2116,7 +2116,6 @@ time_err: qeth_put_reply(reply); return rc; } -EXPORT_SYMBOL_GPL(qeth_send_control_data); static int qeth_cm_enable_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index c7fb14a61eed..07a48ee95dba 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -834,15 +834,10 @@ enum qeth_ipa_arp_return_codes { extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); -#define QETH_SETASS_BASE_LEN (IPA_PDU_HEADER_SIZE + \ - sizeof(struct qeth_ipacmd_hdr) + \ - sizeof(struct qeth_ipacmd_setassparms_hdr)) #define QETH_SETADP_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \ sizeof(struct qeth_ipacmd_setadpparms_hdr)) #define QETH_SNMP_SETADP_CMDLENGTH 16 -#define QETH_ARP_DATA_SIZE 3968 -#define QETH_ARP_CMD_LEN (QETH_ARP_DATA_SIZE + 8) /* Helper functions */ #define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \ (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY)) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f7d0623999ba..f0b790810ebc 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1696,9 +1696,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card, return -ENOMEM; cmd = __ipa_cmd(iob); cmd->data.setassparms.data.query_arp.request_bits = 0x000F; - rc = qeth_send_control_data(card, - QETH_SETASS_BASE_LEN + QETH_ARP_CMD_LEN, - iob, qeth_l3_arp_query_cb, qinfo); + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_query_cb, qinfo); if (rc) QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n", CARD_DEVID(card), rc); From c21532771e9f965a609d4280bade7139b1452273 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:17 +0100 Subject: [PATCH 1045/1436] s390/qeth: consolidate filling of low-level cmd length fields The code to fill the IPA length fields is duplicated three times across the driver: 1. qeth_send_ipa_cmd() sets IPA_CMD_LENGTH, which matches the defaults in the IPA_PDU_HEADER template. 2. for OSN, qeth_osn_send_ipa_cmd() bypasses this logic and inserts the length passed by the caller. 3. SNMP commands (that can outgrow IPA_CMD_LENGTH) have their own way of setting the length fields, via qeth_send_ipa_snmp_cmd(). Consolidate this into qeth_prepare_ipa_cmd(), which all originators of IPA cmds already call during setup of their cmd. Let qeth_send_ipa_cmd() pull the length from the cmd instead of hard-coding IPA_CMD_LENGTH. For now, the SNMP code still needs to fix-up its length fields manually. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 43 ++++++++++++------------------- drivers/s390/net/qeth_core_mpc.c | 19 +++----------- drivers/s390/net/qeth_core_mpc.h | 2 -- drivers/s390/net/qeth_l2_main.c | 17 +++++------- 5 files changed, 28 insertions(+), 56 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index f35e0b58ba4e..07840f9c0bb3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1000,7 +1000,8 @@ void qeth_tx_timeout(struct net_device *); void qeth_prepare_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *); void qeth_release_buffer(struct qeth_channel *, struct qeth_cmd_buffer *); -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob); +void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, + u16 cmd_length); struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 47927613d43c..5f450df886c6 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2815,14 +2815,20 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card, cmd->hdr.prot_version = prot; } -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob) +void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, + u16 cmd_length) { + u16 total_length = IPA_PDU_HEADER_SIZE + cmd_length; u8 prot_type = qeth_mpc_select_prot_type(card); memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE); + memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2); memcpy(QETH_IPA_CMD_PROT_TYPE(iob->data), &prot_type, 1); + memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &cmd_length, 2); + memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &cmd_length, 2); memcpy(QETH_IPA_CMD_DEST_ADDR(iob->data), &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH); + memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2); } EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd); @@ -2833,7 +2839,7 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card, iob = qeth_get_buffer(&card->write); if (iob) { - qeth_prepare_ipa_cmd(card, iob); + qeth_prepare_ipa_cmd(card, iob, sizeof(struct qeth_ipa_cmd)); qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot); } else { dev_warn(&card->gdev->dev, @@ -2857,11 +2863,12 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, unsigned long), void *reply_param) { + u16 length; int rc; QETH_CARD_TEXT(card, 4, "sendipa"); - rc = qeth_send_control_data(card, IPA_CMD_LENGTH, - iob, reply_cb, reply_param); + memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2); + rc = qeth_send_control_data(card, length, iob, reply_cb, reply_param); if (rc == -ETIME) { qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); @@ -4460,27 +4467,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum) return rc; } -static int qeth_send_ipa_snmp_cmd(struct qeth_card *card, - struct qeth_cmd_buffer *iob, int len, - int (*reply_cb)(struct qeth_card *, struct qeth_reply *, - unsigned long), - void *reply_param) -{ - u16 s1, s2; - - QETH_CARD_TEXT(card, 4, "sendsnmp"); - - /* adjust PDU length fields in IPA_PDU_HEADER */ - s1 = (u32) IPA_PDU_HEADER_SIZE + len; - s2 = (u32) len; - memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2); - memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2); - return qeth_send_control_data(card, IPA_PDU_HEADER_SIZE + len, iob, - reply_cb, reply_param); -} - static int qeth_snmp_command_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long sdata) { @@ -4586,10 +4572,13 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata) rc = -ENOMEM; goto out; } + + /* for large requests, fix-up the length fields: */ + qeth_prepare_ipa_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len); + cmd = __ipa_cmd(iob); memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len); - rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len, - qeth_snmp_command_cb, (void *)&qinfo); + rc = qeth_send_ipa_cmd(card, iob, qeth_snmp_command_cb, &qinfo); if (rc) QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n", CARD_DEVID(card), rc); diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 16fc51ad0514..7ff221ae0a2e 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -125,24 +125,13 @@ unsigned char DM_ACT[] = { unsigned char IPA_PDU_HEADER[] = { 0x00, 0xe0, 0x00, 0x00, 0x77, 0x77, 0x77, 0x77, - 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, - (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) / 256, - (IPA_PDU_HEADER_SIZE+sizeof(struct qeth_ipa_cmd)) % 256, + 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x00, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x05, - 0x77, 0x77, 0x77, 0x77, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x05, 0x77, 0x77, 0x77, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, - sizeof(struct qeth_ipa_cmd) / 256, - sizeof(struct qeth_ipa_cmd) % 256, - 0x00, 0x00, 0x00, 0x40, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, }; struct ipa_rc_msg { diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 07a48ee95dba..1ac88da91a40 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -21,8 +21,6 @@ extern unsigned char IPA_PDU_HEADER[]; #define QETH_IPA_CMD_DEST_ADDR(buffer) (buffer + 0x2c) -#define IPA_CMD_LENGTH (IPA_PDU_HEADER_SIZE + sizeof(struct qeth_ipa_cmd)) - #define QETH_SEQ_NO_LENGTH 4 #define QETH_MPC_TOKEN_LENGTH 4 #define QETH_MCL_LENGTH 4 diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index ef0b5eaf2532..453b3f7f272c 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1124,20 +1124,14 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len, } static int qeth_osn_send_ipa_cmd(struct qeth_card *card, - struct qeth_cmd_buffer *iob, int data_len) + struct qeth_cmd_buffer *iob) { - u16 s1, s2; + u16 length; QETH_CARD_TEXT(card, 4, "osndipa"); - qeth_prepare_ipa_cmd(card, iob); - s1 = (u16)(IPA_PDU_HEADER_SIZE + data_len); - s2 = (u16)data_len; - memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &s1, 2); - memcpy(QETH_IPA_PDU_LEN_PDU1(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU2(iob->data), &s2, 2); - memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &s2, 2); - return qeth_osn_send_control_data(card, s1, iob); + memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2); + return qeth_osn_send_control_data(card, length, iob); } int qeth_osn_assist(struct net_device *dev, void *data, int data_len) @@ -1154,8 +1148,9 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len) if (!qeth_card_hw_is_reachable(card)) return -ENODEV; iob = qeth_wait_for_buffer(&card->write); + qeth_prepare_ipa_cmd(card, iob, (u16) data_len); memcpy(__ipa_cmd(iob), data, data_len); - return qeth_osn_send_ipa_cmd(card, iob, data_len); + return qeth_osn_send_ipa_cmd(card, iob); } EXPORT_SYMBOL(qeth_osn_assist); From 7e83747dc95438c83d27e2ed8dbdc3e21fc88e83 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:18 +0100 Subject: [PATCH 1046/1436] s390/qeth: enable only required csum offload features Current code attempts to enable all advertised HW csum offload features. Future-proof this by enabling only those features that we actually use. Also, the IPv4 header csum feature is only needed for TX on L3 devices. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5f450df886c6..31720044656a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6329,8 +6329,11 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype, struct qeth_checksum_cmd chksum_cb; int rc; - if (prot == QETH_PROT_IPV4) + /* some L3 HW requires combined L3+L4 csum offload: */ + if (IS_LAYER3(card) && prot == QETH_PROT_IPV4 && + cstype == IPA_OUTBOUND_CHECKSUM) required_features |= QETH_IPA_CHECKSUM_IP_HDR; + rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0, &chksum_cb, prot); if (!rc) { @@ -6351,8 +6354,12 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype, prot, QETH_CARD_IFNAME(card)); return rc; } + + if (chksum_cb.supported & QETH_IPA_CHECKSUM_LP2LP) + required_features |= QETH_IPA_CHECKSUM_LP2LP; + rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE, - chksum_cb.supported, &chksum_cb, + required_features, &chksum_cb, prot); if (!rc) { if ((required_features & chksum_cb.enabled) != From 4386e34fab6c89b7b8dae4a44e3cbed5c16a1c4f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:19 +0100 Subject: [PATCH 1047/1436] s390/qeth: align csum offload with TSO control logic csum offload and TSO have similar programming requirements. The TSO code was reworked with commit "s390/qeth: enhance TSO control sequence", adjust the csum control flow accordingly. Primarily this means replacing custom helpers with more generic infrastructure. Also, change the LP2LP check so that it warns on TX offload (not RX). This is where reduced csum capability actually matters. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 139 +++++++++++------------------- drivers/s390/net/qeth_core_mpc.h | 7 -- 2 files changed, 52 insertions(+), 94 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 31720044656a..428175e1cb5d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6267,66 +6267,33 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, } EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_link_ksettings); -/* Callback to handle checksum offload command reply from OSA card. - * Verify that required features have been enabled on the card. - * Return error in hdr->return_code as this value is checked by caller. - * - * Always returns zero to indicate no further messages from the OSA card. - */ -static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card, - struct qeth_reply *reply, - unsigned long data) +static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - struct qeth_checksum_cmd *chksum_cb = - (struct qeth_checksum_cmd *)reply->param; + u32 *features = reply->param; - QETH_CARD_TEXT(card, 4, "chkdoccb"); if (qeth_setassparms_inspect_rc(cmd)) return 0; - memset(chksum_cb, 0, sizeof(*chksum_cb)); - if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_START) { - chksum_cb->supported = - cmd->data.setassparms.data.chksum.supported; - QETH_CARD_TEXT_(card, 3, "strt:%x", chksum_cb->supported); - } - if (cmd->data.setassparms.hdr.command_code == IPA_CMD_ASS_ENABLE) { - chksum_cb->supported = - cmd->data.setassparms.data.chksum.supported; - chksum_cb->enabled = - cmd->data.setassparms.data.chksum.enabled; - QETH_CARD_TEXT_(card, 3, "supp:%x", chksum_cb->supported); - QETH_CARD_TEXT_(card, 3, "enab:%x", chksum_cb->enabled); - } + *features = cmd->data.setassparms.data.flags_32bit; return 0; } -/* Send command to OSA card and check results. */ -static int qeth_ipa_checksum_run_cmd(struct qeth_card *card, - enum qeth_ipa_funcs ipa_func, - __u16 cmd_code, long data, - struct qeth_checksum_cmd *chksum_cb, - enum qeth_prot_versions prot) +static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs cstype, + enum qeth_prot_versions prot) { - struct qeth_cmd_buffer *iob; - - QETH_CARD_TEXT(card, 4, "chkdocmd"); - iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, - sizeof(__u32), prot); - if (!iob) - return -ENOMEM; - - __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (__u32) data; - return qeth_send_ipa_cmd(card, iob, qeth_ipa_checksum_run_cmd_cb, - chksum_cb); + return qeth_send_simple_setassparms_prot(card, cstype, + IPA_CMD_ASS_STOP, 0, prot); } -static int qeth_send_checksum_on(struct qeth_card *card, int cstype, - enum qeth_prot_versions prot) +static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype, + enum qeth_prot_versions prot) { u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP; - struct qeth_checksum_cmd chksum_cb; + struct qeth_cmd_buffer *iob; + struct qeth_ipa_caps caps; + u32 features; int rc; /* some L3 HW requires combined L3+L4 csum offload: */ @@ -6334,59 +6301,57 @@ static int qeth_send_checksum_on(struct qeth_card *card, int cstype, cstype == IPA_OUTBOUND_CHECKSUM) required_features |= QETH_IPA_CHECKSUM_IP_HDR; - rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_START, 0, - &chksum_cb, prot); - if (!rc) { - if ((required_features & chksum_cb.supported) != - required_features) - rc = -EIO; - else if (!(QETH_IPA_CHECKSUM_LP2LP & chksum_cb.supported) && - cstype == IPA_INBOUND_CHECKSUM) - dev_warn(&card->gdev->dev, - "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n", - QETH_CARD_IFNAME(card)); - } - if (rc) { - qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, prot); - dev_warn(&card->gdev->dev, - "Starting HW IPv%d checksumming for %s failed, using SW checksumming\n", - prot, QETH_CARD_IFNAME(card)); - return rc; - } - - if (chksum_cb.supported & QETH_IPA_CHECKSUM_LP2LP) - required_features |= QETH_IPA_CHECKSUM_LP2LP; - - rc = qeth_ipa_checksum_run_cmd(card, cstype, IPA_CMD_ASS_ENABLE, - required_features, &chksum_cb, + iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_START, 0, prot); - if (!rc) { - if ((required_features & chksum_cb.enabled) != - required_features) - rc = -EIO; - } - if (rc) { - qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, prot); - dev_warn(&card->gdev->dev, - "Enabling HW IPv%d checksumming for %s failed, using SW checksumming\n", - prot, QETH_CARD_IFNAME(card)); + if (!iob) + return -ENOMEM; + + rc = qeth_send_ipa_cmd(card, iob, qeth_start_csum_cb, &features); + if (rc) return rc; + + if ((required_features & features) != required_features) { + qeth_set_csum_off(card, cstype, prot); + return -EOPNOTSUPP; + } + + iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_ENABLE, 4, + prot); + if (!iob) { + qeth_set_csum_off(card, cstype, prot); + return -ENOMEM; + } + + if (features & QETH_IPA_CHECKSUM_LP2LP) + required_features |= QETH_IPA_CHECKSUM_LP2LP; + __ipa_cmd(iob)->data.setassparms.data.flags_32bit = required_features; + rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_get_caps_cb, &caps); + if (rc) { + qeth_set_csum_off(card, cstype, prot); + return rc; + } + + if (!qeth_ipa_caps_supported(&caps, required_features) || + !qeth_ipa_caps_enabled(&caps, required_features)) { + qeth_set_csum_off(card, cstype, prot); + return -EOPNOTSUPP; } dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n", cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot); + if (!qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP) && + cstype == IPA_OUTBOUND_CHECKSUM) + dev_warn(&card->gdev->dev, + "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n", + QETH_CARD_IFNAME(card)); return 0; } static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype, enum qeth_prot_versions prot) { - int rc = (on) ? qeth_send_checksum_on(card, cstype, prot) - : qeth_send_simple_setassparms_prot(card, cstype, - IPA_CMD_ASS_STOP, 0, - prot); + int rc = (on) ? qeth_set_csum_on(card, cstype, prot) : + qeth_set_csum_off(card, cstype, prot); return rc ? -EIO : 0; } diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 1ac88da91a40..7919a0c8109a 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -417,12 +417,6 @@ enum qeth_ipa_checksum_bits { QETH_IPA_CHECKSUM_LP2LP = 0x0020 }; -/* IPA Assist checksum offload reply layout. */ -struct qeth_checksum_cmd { - __u32 supported; - __u32 enabled; -} __packed; - enum qeth_ipa_large_send_caps { QETH_IPA_LARGE_SEND_TCP = 0x00000001, }; @@ -438,7 +432,6 @@ struct qeth_ipacmd_setassparms { union { __u32 flags_32bit; struct qeth_ipa_caps caps; - struct qeth_checksum_cmd chksum; struct qeth_arp_cache_entry arp_entry; struct qeth_arp_query_data query_arp; struct qeth_tso_start_data tso; From 51581fd07d8b8f5c3ff4444d6a8d2dc130d6cf57 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:20 +0100 Subject: [PATCH 1048/1436] s390/qeth: limit trace to valid data of command request 'len' specifies how much data we send to the HW, don't dump beyond this boundary. As of today this is no big concern - commands are built in full, zeroed pages. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 428175e1cb5d..ae2ea0a0edce 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1991,7 +1991,7 @@ void qeth_prepare_control_data(struct qeth_card *card, int len, card->seqno.pdu_hdr++; memcpy(QETH_PDU_HEADER_ACK_SEQ_NO(iob->data), &card->seqno.pdu_hdr_ack, QETH_SEQ_NO_LENGTH); - QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN); + QETH_DBF_HEX(CTRL, 2, iob->data, min(len, QETH_DBF_CTRL_LEN)); } EXPORT_SYMBOL_GPL(qeth_prepare_control_data); From 0951c6babf49d2f2429cbfbea5cf792d427ecc6a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:21 +0100 Subject: [PATCH 1049/1436] s390/qeth: simplify reply object handling Current code enqueues & dequeues a reply object from the waiter list in various places. In particular, the dequeue & enqueue in qeth_send_control_data_cb() looks fragile - this can cause qeth_clear_ipacmd_list() to skip the active object. Add some helpers, and boil the logic down by giving qeth_send_control_data() the sole responsibility to add and remove objects. qeth_send_control_data_cb() and qeth_clear_ipacmd_list() will now only notify the reply object to interrupt its wait cycle. This can cause a slight delay in the removal, but that's no concern. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 118 +++++++++++++++--------------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ae2ea0a0edce..04beb0922b31 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -566,6 +566,7 @@ static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) if (reply) { refcount_set(&reply->refcnt, 1); atomic_set(&reply->received, 0); + init_waitqueue_head(&reply->wait_q); } return reply; } @@ -581,6 +582,26 @@ static void qeth_put_reply(struct qeth_reply *reply) kfree(reply); } +static void qeth_enqueue_reply(struct qeth_card *card, struct qeth_reply *reply) +{ + spin_lock_irq(&card->lock); + list_add_tail(&reply->list, &card->cmd_waiter_list); + spin_unlock_irq(&card->lock); +} + +static void qeth_dequeue_reply(struct qeth_card *card, struct qeth_reply *reply) +{ + spin_lock_irq(&card->lock); + list_del(&reply->list); + spin_unlock_irq(&card->lock); +} + +static void qeth_notify_reply(struct qeth_reply *reply) +{ + atomic_inc(&reply->received); + wake_up(&reply->wait_q); +} + static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, struct qeth_card *card) { @@ -657,19 +678,15 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, void qeth_clear_ipacmd_list(struct qeth_card *card) { - struct qeth_reply *reply, *r; + struct qeth_reply *reply; unsigned long flags; QETH_CARD_TEXT(card, 4, "clipalst"); spin_lock_irqsave(&card->lock, flags); - list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) { - qeth_get_reply(reply); + list_for_each_entry(reply, &card->cmd_waiter_list, list) { reply->rc = -EIO; - atomic_inc(&reply->received); - list_del_init(&reply->list); - wake_up(&reply->wait_q); - qeth_put_reply(reply); + qeth_notify_reply(reply); } spin_unlock_irqrestore(&card->lock, flags); } @@ -774,9 +791,10 @@ static void qeth_send_control_data_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob) { struct qeth_ipa_cmd *cmd = NULL; - struct qeth_reply *reply, *r; + struct qeth_reply *reply = NULL; + struct qeth_reply *r; unsigned long flags; - int keep_reply; + int keep_reply = 0; int rc = 0; QETH_CARD_TEXT(card, 4, "sndctlcb"); @@ -808,44 +826,40 @@ static void qeth_send_control_data_cb(struct qeth_card *card, goto out; } + /* match against pending cmd requests */ spin_lock_irqsave(&card->lock, flags); - list_for_each_entry_safe(reply, r, &card->cmd_waiter_list, list) { - if ((reply->seqno == QETH_IDX_COMMAND_SEQNO) || - ((cmd) && (reply->seqno == cmd->hdr.seqno))) { + list_for_each_entry(r, &card->cmd_waiter_list, list) { + if ((r->seqno == QETH_IDX_COMMAND_SEQNO) || + (cmd && (r->seqno == cmd->hdr.seqno))) { + reply = r; + /* take the object outside the lock */ qeth_get_reply(reply); - list_del_init(&reply->list); - spin_unlock_irqrestore(&card->lock, flags); - keep_reply = 0; - if (reply->callback != NULL) { - if (cmd) { - reply->offset = (__u16)((char *)cmd - - (char *)iob->data); - keep_reply = reply->callback(card, - reply, - (unsigned long)cmd); - } else - keep_reply = reply->callback(card, - reply, - (unsigned long)iob); - } - if (cmd) - reply->rc = (u16) cmd->hdr.return_code; - else if (iob->rc) - reply->rc = iob->rc; - if (keep_reply) { - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, - &card->cmd_waiter_list); - spin_unlock_irqrestore(&card->lock, flags); - } else { - atomic_inc(&reply->received); - wake_up(&reply->wait_q); - } - qeth_put_reply(reply); - goto out; + break; } } spin_unlock_irqrestore(&card->lock, flags); + + if (!reply) + goto out; + + if (reply->callback) { + if (cmd) { + reply->offset = (u16)((char *)cmd - (char *)iob->data); + keep_reply = reply->callback(card, reply, + (unsigned long)cmd); + } else + keep_reply = reply->callback(card, reply, + (unsigned long)iob); + } + if (cmd) + reply->rc = (u16) cmd->hdr.return_code; + else if (iob->rc) + reply->rc = iob->rc; + + if (!keep_reply) + qeth_notify_reply(reply); + qeth_put_reply(reply); + out: memcpy(&card->seqno.pdu_hdr_ack, QETH_PDU_HEADER_SEQ_NO(iob->data), @@ -2047,8 +2061,6 @@ static int qeth_send_control_data(struct qeth_card *card, int len, reply->callback = reply_cb; reply->param = reply_param; - init_waitqueue_head(&reply->wait_q); - while (atomic_cmpxchg(&channel->irq_pending, 0, 1)) ; if (IS_IPA(iob->data)) { @@ -2062,9 +2074,7 @@ static int qeth_send_control_data(struct qeth_card *card, int len, } qeth_prepare_control_data(card, len, iob); - spin_lock_irq(&card->lock); - list_add_tail(&reply->list, &card->cmd_waiter_list); - spin_unlock_irq(&card->lock); + qeth_enqueue_reply(card, reply); timeout = jiffies + event_timeout; @@ -2077,10 +2087,8 @@ static int qeth_send_control_data(struct qeth_card *card, int len, QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n", CARD_DEVID(card), rc); QETH_CARD_TEXT_(card, 2, " err%d", rc); - spin_lock_irq(&card->lock); - list_del_init(&reply->list); + qeth_dequeue_reply(card, reply); qeth_put_reply(reply); - spin_unlock_irq(&card->lock); qeth_release_buffer(channel, iob); atomic_set(&channel->irq_pending, 0); wake_up(&card->wait_q); @@ -2102,19 +2110,15 @@ static int qeth_send_control_data(struct qeth_card *card, int len, } } + qeth_dequeue_reply(card, reply); rc = reply->rc; qeth_put_reply(reply); return rc; time_err: - reply->rc = -ETIME; - spin_lock_irq(&card->lock); - list_del_init(&reply->list); - spin_unlock_irq(&card->lock); - atomic_inc(&reply->received); - rc = reply->rc; + qeth_dequeue_reply(card, reply); qeth_put_reply(reply); - return rc; + return -ETIME; } static int qeth_cm_enable_cb(struct qeth_card *card, struct qeth_reply *reply, From 54daaca7024d5419dad64db8a3e65f6b38f24b7f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:22 +0100 Subject: [PATCH 1050/1436] s390/qeth: cancel cmd on early error When sending cmds via qeth_send_control_data(), qeth puts the request on the IO channel and then blocks on the reply object until the response has been received. If the IO completes with error, there will never be a response and we block until the reply-wait hits its timeout. For this case, connect the request buffer to its reply object, so that we can immediately cancel the wait. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 37 +++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 07840f9c0bb3..83f710336685 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -595,6 +595,7 @@ struct qeth_channel; struct qeth_cmd_buffer { enum qeth_cmd_buffer_state state; struct qeth_channel *channel; + struct qeth_reply *reply; unsigned char *data; int rc; void (*callback)(struct qeth_card *card, struct qeth_channel *channel, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 04beb0922b31..b1a7a35a086e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -743,6 +743,10 @@ void qeth_release_buffer(struct qeth_channel *channel, spin_lock_irqsave(&channel->iob_lock, flags); iob->state = BUF_STATE_FREE; iob->callback = qeth_send_control_data_cb; + if (iob->reply) { + qeth_put_reply(iob->reply); + iob->reply = NULL; + } iob->rc = 0; spin_unlock_irqrestore(&channel->iob_lock, flags); wake_up(&channel->wait_q); @@ -756,6 +760,17 @@ static void qeth_release_buffer_cb(struct qeth_card *card, qeth_release_buffer(channel, iob); } +static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc) +{ + struct qeth_reply *reply = iob->reply; + + if (reply) { + reply->rc = rc; + qeth_notify_reply(reply); + } + qeth_release_buffer(iob->channel, iob); +} + static struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel) { struct qeth_cmd_buffer *buffer = NULL; @@ -990,9 +1005,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev, return 0; } -static long qeth_check_irb_error(struct qeth_card *card, - struct ccw_device *cdev, unsigned long intparm, - struct irb *irb) +static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, + unsigned long intparm, struct irb *irb) { if (!IS_ERR(irb)) return 0; @@ -1003,7 +1017,7 @@ static long qeth_check_irb_error(struct qeth_card *card, CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT_(card, 2, " rc%d", -EIO); - break; + return -EIO; case -ETIMEDOUT: dev_warn(&cdev->dev, "A hardware operation timed out" " on the device\n"); @@ -1015,14 +1029,14 @@ static long qeth_check_irb_error(struct qeth_card *card, wake_up(&card->wait_q); } } - break; + return -ETIMEDOUT; default: QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n", PTR_ERR(irb), CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT(card, 2, " rc???"); + return PTR_ERR(irb); } - return PTR_ERR(irb); } static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, @@ -1057,10 +1071,11 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, if (qeth_intparm_is_iob(intparm)) iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); - if (qeth_check_irb_error(card, cdev, intparm, irb)) { + rc = qeth_check_irb_error(card, cdev, intparm, irb); + if (rc) { /* IO was terminated, free its resources. */ if (iob) - qeth_release_buffer(iob->channel, iob); + qeth_cancel_cmd(iob, rc); atomic_set(&channel->irq_pending, 0); wake_up(&card->wait_q); return; @@ -1116,7 +1131,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, if (rc) { card->read_or_write_problem = 1; if (iob) - qeth_release_buffer(iob->channel, iob); + qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@ -2061,6 +2076,10 @@ static int qeth_send_control_data(struct qeth_card *card, int len, reply->callback = reply_cb; reply->param = reply_param; + /* pairs with qeth_release_buffer(): */ + qeth_get_reply(reply); + iob->reply = reply; + while (atomic_cmpxchg(&channel->irq_pending, 0, 1)) ; if (IS_IPA(iob->data)) { From 4b7ae12216948229b065d6bf4776a5681d46330c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:23 +0100 Subject: [PATCH 1051/1436] s390/qeth: allow cmd callbacks to return errnos Error propagation from cmd callbacks currently works in a way where qeth_send_control_data_cb() picks the raw HW code from the response, and the cmd's originator later translates this into an errno. The callback itself only returns 0 ("done") or 1 ("expect more data"). This is 1. limiting, as the only means for the callback to report an internal error is to invent pseudo HW codes (such as IPA_RC_ENOMEM), that the originator then needs to understand. For non-IPA callbacks, we even provide a separate field in the IO buffer metadata (iob->rc) so the callback can pass back a return value. 2. fragile, as the originator must take care to not translate any errno that is returned by qeth's own IO code paths (eg -ENOMEM). Also, any originator that forgets to translate the HW codes potentially passes garbage back to its caller. For instance, see commit 2aa4867198c2 ("s390/qeth: translate SETVLAN/DELVLAN errors"). Introduce a new model where all HW error translation is done within the callback, and the callback returns > 0, if it expects more data (as before) == 0, on success < 0, with an errno Start off with converting all callbacks to the new model that either a) pass back pseudo HW codes, or b) have a dependency on a specific HW error code. Also convert c) the one callback that uses iob->rc, and d) qeth_setadpparms_change_macaddr_cb() so that it can pass back an error back to qeth_l2_request_initial_mac() even when the cmd itself was successful. The old model remains supported: if the callback returns 0, we still propagate the response's HW error code back to the originator. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 1 - drivers/s390/net/qeth_core_main.c | 82 +++++++++++++++++-------------- drivers/s390/net/qeth_core_mpc.c | 4 +- drivers/s390/net/qeth_core_mpc.h | 1 - drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 50 +++++++++++-------- 6 files changed, 76 insertions(+), 64 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 83f710336685..18696ffb662d 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -597,7 +597,6 @@ struct qeth_cmd_buffer { struct qeth_channel *channel; struct qeth_reply *reply; unsigned char *data; - int rc; void (*callback)(struct qeth_card *card, struct qeth_channel *channel, struct qeth_cmd_buffer *iob); }; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b1a7a35a086e..cdb65ba99888 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -747,7 +747,6 @@ void qeth_release_buffer(struct qeth_channel *channel, qeth_put_reply(iob->reply); iob->reply = NULL; } - iob->rc = 0; spin_unlock_irqrestore(&channel->iob_lock, flags); wake_up(&channel->wait_q); } @@ -809,7 +808,6 @@ static void qeth_send_control_data_cb(struct qeth_card *card, struct qeth_reply *reply = NULL; struct qeth_reply *r; unsigned long flags; - int keep_reply = 0; int rc = 0; QETH_CARD_TEXT(card, 4, "sndctlcb"); @@ -857,22 +855,27 @@ static void qeth_send_control_data_cb(struct qeth_card *card, if (!reply) goto out; - if (reply->callback) { + if (!reply->callback) { + rc = 0; + } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); - keep_reply = reply->callback(card, reply, - (unsigned long)cmd); - } else - keep_reply = reply->callback(card, reply, - (unsigned long)iob); + rc = reply->callback(card, reply, (unsigned long)cmd); + } else { + rc = reply->callback(card, reply, (unsigned long)iob); + } } - if (cmd) - reply->rc = (u16) cmd->hdr.return_code; - else if (iob->rc) - reply->rc = iob->rc; - if (!keep_reply) + if (rc <= 0) { + if (cmd) + reply->rc = (u16) cmd->hdr.return_code; + + /* for callbacks with proper errnos: */ + if (rc < 0) + reply->rc = rc; qeth_notify_reply(reply); + } + qeth_put_reply(reply); out: @@ -1293,7 +1296,6 @@ static int qeth_setup_channel(struct qeth_channel *channel, bool alloc_buffers) channel->iob[cnt].state = BUF_STATE_FREE; channel->iob[cnt].channel = channel; channel->iob[cnt].callback = qeth_send_control_data_cb; - channel->iob[cnt].rc = 0; } if (cnt < QETH_CMD_BUFFER_NO) { qeth_clean_channel(channel); @@ -2343,9 +2345,8 @@ static int qeth_ulp_setup_cb(struct qeth_card *card, struct qeth_reply *reply, QETH_DBF_TEXT(SETUP, 2, "olmlimit"); dev_err(&card->gdev->dev, "A connection could not be " "established because of an OLM limit\n"); - iob->rc = -EMLINK; + return -EMLINK; } - QETH_DBF_TEXT_(SETUP, 2, " rc%d", iob->rc); return 0; } @@ -2900,9 +2901,19 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, } EXPORT_SYMBOL_GPL(qeth_send_ipa_cmd); +static int qeth_send_startlan_cb(struct qeth_card *card, + struct qeth_reply *reply, unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + if (cmd->hdr.return_code == IPA_RC_LAN_OFFLINE) + return -ENETDOWN; + + return (cmd->hdr.return_code) ? -EIO : 0; +} + static int qeth_send_startlan(struct qeth_card *card) { - int rc; struct qeth_cmd_buffer *iob; QETH_DBF_TEXT(SETUP, 2, "strtlan"); @@ -2910,8 +2921,7 @@ static int qeth_send_startlan(struct qeth_card *card) iob = qeth_get_ipacmd_buffer(card, IPA_CMD_STARTLAN, 0); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, NULL, NULL); - return rc; + return qeth_send_ipa_cmd(card, iob, qeth_send_startlan_cb, NULL); } static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) @@ -4238,12 +4248,15 @@ static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "chgmaccb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; adp_cmd = &cmd->data.setadapterparms; + if (!is_valid_ether_addr(adp_cmd->data.change_addr.addr)) + return -EADDRNOTAVAIL; + if (IS_LAYER2(card) && IS_OSD(card) && !IS_VM_NIC(card) && !(adp_cmd->hdr.flags & QETH_SETADP_FLAGS_VIRTUAL_MAC)) - return 0; + return -EADDRNOTAVAIL; ether_addr_copy(card->dev->dev_addr, adp_cmd->data.change_addr.addr); return 0; @@ -4507,13 +4520,13 @@ static int qeth_snmp_command_cb(struct qeth_card *card, if (cmd->hdr.return_code) { QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code); - return 0; + return -EIO; } if (cmd->data.setadapterparms.hdr.return_code) { cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; QETH_CARD_TEXT_(card, 4, "scer2%x", cmd->hdr.return_code); - return 0; + return -EIO; } data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data)); if (cmd->data.setadapterparms.hdr.seq_no == 1) { @@ -4528,9 +4541,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card, /* check if there is enough room in userspace */ if ((qinfo->udata_len - qinfo->udata_offset) < data_len) { - QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOMEM); - cmd->hdr.return_code = IPA_RC_ENOMEM; - return 0; + QETH_CARD_TEXT_(card, 4, "scer3%i", -ENOSPC); + return -ENOSPC; } QETH_CARD_TEXT_(card, 4, "snore%i", cmd->data.setadapterparms.hdr.used_total); @@ -4625,16 +4637,14 @@ static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 3, "qoatcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; - if (resdatalen > (priv->buffer_len - priv->response_len)) { - cmd->hdr.return_code = IPA_RC_FFFF; - return 0; - } + if (resdatalen > (priv->buffer_len - priv->response_len)) + return -ENOSPC; memcpy((priv->buffer + priv->response_len), resdata, resdatalen); @@ -4707,9 +4717,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata) if (copy_to_user(udata, &oat_data, sizeof(struct qeth_query_oat_data))) rc = -EFAULT; - } else - if (rc == IPA_RC_FFFF) - rc = -EFAULT; + } out_free: vfree(priv.buffer); @@ -5128,12 +5136,10 @@ retriable: rc = qeth_send_startlan(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc); - if (rc == IPA_RC_LAN_OFFLINE) { - dev_warn(&card->gdev->dev, - "The LAN is offline\n"); + if (rc == -ENETDOWN) { + dev_warn(&card->gdev->dev, "The LAN is offline\n"); *carrier_ok = false; } else { - rc = -ENODEV; goto out; } } else { diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 7ff221ae0a2e..e3f4866c158e 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -201,12 +201,10 @@ static const struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_LAN_OFFLINE, "STRTLAN_LAN_DISABLED - LAN offline"}, {IPA_RC_VEPA_TO_VEB_TRANSITION, "Adj. switch disabled port mode RR"}, {IPA_RC_INVALID_IP_VERSION2, "Invalid IP version"}, - {IPA_RC_ENOMEM, "Memory problem"}, + /* default for qeth_get_ipa_msg(): */ {IPA_RC_FFFF, "Unknown Error"} }; - - const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) { int x; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 7919a0c8109a..4b1690380ebe 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -229,7 +229,6 @@ enum qeth_ipa_return_codes { IPA_RC_LAN_OFFLINE = 0xe080, IPA_RC_VEPA_TO_VEB_TRANSITION = 0xe090, IPA_RC_INVALID_IP_VERSION2 = 0xf001, - IPA_RC_ENOMEM = 0xfffe, IPA_RC_FFFF = 0xffff }; /* for VNIC Characteristics */ diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 453b3f7f272c..6d9eb54ea3b0 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -393,7 +393,7 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) if (!IS_OSN(card)) { rc = qeth_setadpparms_change_macaddr(card); - if (!rc && is_valid_ether_addr(card->dev->dev_addr)) + if (!rc) goto out; QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", CARD_DEVID(card), rc); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f0b790810ebc..852bfd5cc3e9 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -253,8 +253,7 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) } else rc = qeth_l3_register_addr_entry(card, addr); - if (!rc || (rc == IPA_RC_DUPLICATE_IP_ADDRESS) || - (rc == IPA_RC_LAN_OFFLINE)) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; if (addr->ref_counter < 1) { qeth_l3_deregister_addr_entry(card, addr); @@ -338,10 +337,28 @@ static void qeth_l3_recover_ip(struct qeth_card *card) } +static int qeth_l3_setdelip_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + switch (cmd->hdr.return_code) { + case IPA_RC_SUCCESS: + return 0; + case IPA_RC_DUPLICATE_IP_ADDRESS: + return -EADDRINUSE; + case IPA_RC_MC_ADDR_NOT_FOUND: + return -ENOENT; + case IPA_RC_LAN_OFFLINE: + return -ENETDOWN; + default: + return -EIO; + } +} + static int qeth_l3_send_setdelmc(struct qeth_card *card, struct qeth_ipaddr *addr, int ipacmd) { - int rc; struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; @@ -358,9 +375,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card, else memcpy(&cmd->data.setdelipm.ip4, &addr->u.a4.addr, 4); - rc = qeth_send_ipa_cmd(card, iob, NULL, NULL); - - return rc; + return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len) @@ -422,7 +437,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, cmd->data.setdelip4.flags = flags; } - return qeth_send_ipa_cmd(card, iob, NULL, NULL); + return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } static int qeth_l3_send_setrouting(struct qeth_card *card, @@ -1481,14 +1496,14 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) switch (addr->disp_flag) { case QETH_DISP_ADDR_DELETE: rc = qeth_l3_deregister_addr_entry(card, addr); - if (!rc || rc == IPA_RC_MC_ADDR_NOT_FOUND) { + if (!rc || rc == -ENOENT) { hash_del(&addr->hnode); kfree(addr); } break; case QETH_DISP_ADDR_ADD: rc = qeth_l3_register_addr_entry(card, addr); - if (rc && rc != IPA_RC_LAN_OFFLINE) { + if (rc && rc != -ENETDOWN) { hash_del(&addr->hnode); kfree(addr); break; @@ -1599,7 +1614,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, struct qeth_ipa_cmd *cmd; struct qeth_arp_query_data *qdata; struct qeth_arp_query_info *qinfo; - int i; int e; int entrybytes_done; int stripped_bytes; @@ -1613,13 +1627,13 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, if (cmd->hdr.return_code) { QETH_CARD_TEXT(card, 4, "arpcberr"); QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code); - return 0; + return qeth_l3_arp_makerc(cmd->hdr.return_code); } if (cmd->data.setassparms.hdr.return_code) { cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; QETH_CARD_TEXT(card, 4, "setaperr"); QETH_CARD_TEXT_(card, 4, "%i", cmd->hdr.return_code); - return 0; + return qeth_l3_arp_makerc(cmd->hdr.return_code); } qdata = &cmd->data.setassparms.data.query_arp; QETH_CARD_TEXT_(card, 4, "anoen%i", qdata->no_entries); @@ -1646,9 +1660,9 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, break; if ((qinfo->udata_len - qinfo->udata_offset) < esize) { - QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOMEM); - cmd->hdr.return_code = IPA_RC_ENOMEM; - goto out_error; + QETH_CARD_TEXT_(card, 4, "qaer3%i", -ENOSPC); + memset(qinfo->udata, 0, 4); + return -ENOSPC; } memcpy(qinfo->udata + qinfo->udata_offset, @@ -1671,10 +1685,6 @@ static int qeth_l3_arp_query_cb(struct qeth_card *card, memcpy(qinfo->udata + QETH_QARP_MASK_OFFSET, &qdata->reply_bits, 2); QETH_CARD_TEXT_(card, 4, "rc%i", 0); return 0; -out_error: - i = 0; - memcpy(qinfo->udata, &i, 4); - return 0; } static int qeth_l3_query_arp_cache_info(struct qeth_card *card, @@ -1700,7 +1710,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card, if (rc) QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata) From 1709ff8d8603dcfe977ed58557f552b46695d2a3 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:24 +0100 Subject: [PATCH 1052/1436] s390/qeth: convert bridgeport callbacks By letting the callbacks deal with error translation, we no longer need to pass the raw error codes back to the originator. This allows us to slim down the callback's private data, and nicely simplifies the code. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 94 ++++++++++++++------------------- 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 6d9eb54ea3b0..955fb24af418 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1407,8 +1407,6 @@ static void qeth_bridge_host_event(struct qeth_card *card, /* SETBRIDGEPORT support; sending commands */ struct _qeth_sbp_cbctl { - u16 ipa_rc; - u16 cmd_rc; union { u32 supported; struct { @@ -1418,23 +1416,21 @@ struct _qeth_sbp_cbctl { } data; }; -/** - * qeth_bridgeport_makerc() - derive "traditional" error from hardware codes. - * @card: qeth_card structure pointer, for debug messages. - * @cbctl: state structure with hardware return codes. - * @setcmd: IPA command code - * - * Returns negative errno-compatible error indication or 0 on success. - */ static int qeth_bridgeport_makerc(struct qeth_card *card, - struct _qeth_sbp_cbctl *cbctl, enum qeth_ipa_sbp_cmd setcmd) + struct qeth_ipa_cmd *cmd) { + struct qeth_ipacmd_setbridgeport *sbp = &cmd->data.sbp; + enum qeth_ipa_sbp_cmd setcmd = sbp->hdr.command_code; + u16 ipa_rc = cmd->hdr.return_code; + u16 sbp_rc = sbp->hdr.return_code; int rc; - int is_iqd = (card->info.type == QETH_CARD_TYPE_IQD); - if ((is_iqd && (cbctl->ipa_rc == IPA_RC_SUCCESS)) || - (!is_iqd && (cbctl->ipa_rc == cbctl->cmd_rc))) - switch (cbctl->cmd_rc) { + if (ipa_rc == IPA_RC_SUCCESS && sbp_rc == IPA_RC_SUCCESS) + return 0; + + if ((IS_IQD(card) && ipa_rc == IPA_RC_SUCCESS) || + (!IS_IQD(card) && ipa_rc == sbp_rc)) { + switch (sbp_rc) { case IPA_RC_SUCCESS: rc = 0; break; @@ -1498,8 +1494,8 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, default: rc = -EIO; } - else - switch (cbctl->ipa_rc) { + } else { + switch (ipa_rc) { case IPA_RC_NOTSUPP: rc = -EOPNOTSUPP; break; @@ -1509,10 +1505,11 @@ static int qeth_bridgeport_makerc(struct qeth_card *card, default: rc = -EIO; } + } if (rc) { - QETH_CARD_TEXT_(card, 2, "SBPi%04x", cbctl->ipa_rc); - QETH_CARD_TEXT_(card, 2, "SBPc%04x", cbctl->cmd_rc); + QETH_CARD_TEXT_(card, 2, "SBPi%04x", ipa_rc); + QETH_CARD_TEXT_(card, 2, "SBPc%04x", sbp_rc); } return rc; } @@ -1544,15 +1541,15 @@ static int qeth_bridgeport_query_support_cb(struct qeth_card *card, { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + int rc; + QETH_CARD_TEXT(card, 2, "brqsupcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - if ((cbctl->ipa_rc == 0) && (cbctl->cmd_rc == 0)) { - cbctl->data.supported = - cmd->data.sbp.data.query_cmds_supp.supported_cmds; - } else { - cbctl->data.supported = 0; - } + rc = qeth_bridgeport_makerc(card, cmd); + if (rc) + return rc; + + cbctl->data.supported = + cmd->data.sbp.data.query_cmds_supp.supported_cmds; return 0; } @@ -1573,12 +1570,11 @@ static void qeth_bridgeport_query_support(struct qeth_card *card) sizeof(struct qeth_sbp_query_cmds_supp)); if (!iob) return; + if (qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_support_cb, - (void *)&cbctl) || - qeth_bridgeport_makerc(card, &cbctl, - IPA_SBP_QUERY_COMMANDS_SUPPORTED)) { - /* non-zero makerc signifies failure, and produce messages */ + &cbctl)) { card->options.sbp.role = QETH_SBP_ROLE_NONE; + card->options.sbp.supported_funcs = 0; return; } card->options.sbp.supported_funcs = cbctl.data.supported; @@ -1590,16 +1586,16 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card, struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_sbp_query_ports *qports = &cmd->data.sbp.data.query_ports; struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + int rc; QETH_CARD_TEXT(card, 2, "brqprtcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - if ((cbctl->ipa_rc != 0) || (cbctl->cmd_rc != 0)) - return 0; + rc = qeth_bridgeport_makerc(card, cmd); + if (rc) + return rc; + if (qports->entry_length != sizeof(struct qeth_sbp_port_entry)) { - cbctl->cmd_rc = 0xffff; QETH_CARD_TEXT_(card, 2, "SBPs%04x", qports->entry_length); - return 0; + return -EINVAL; } /* first entry contains the state of the local port */ if (qports->num_entries > 0) { @@ -1624,7 +1620,6 @@ static int qeth_bridgeport_query_ports_cb(struct qeth_card *card, int qeth_bridgeport_query_ports(struct qeth_card *card, enum qeth_sbp_roles *role, enum qeth_sbp_states *state) { - int rc = 0; struct qeth_cmd_buffer *iob; struct _qeth_sbp_cbctl cbctl = { .data = { @@ -1641,22 +1636,18 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_BRIDGE_PORTS, 0); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb, - (void *)&cbctl); - if (rc < 0) - return rc; - return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS); + + return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb, + &cbctl); } static int qeth_bridgeport_set_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; - struct _qeth_sbp_cbctl *cbctl = (struct _qeth_sbp_cbctl *)reply->param; + QETH_CARD_TEXT(card, 2, "brsetrcb"); - cbctl->ipa_rc = cmd->hdr.return_code; - cbctl->cmd_rc = cmd->data.sbp.hdr.return_code; - return 0; + return qeth_bridgeport_makerc(card, cmd); } /** @@ -1668,10 +1659,8 @@ static int qeth_bridgeport_set_cb(struct qeth_card *card, */ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) { - int rc = 0; int cmdlength; struct qeth_cmd_buffer *iob; - struct _qeth_sbp_cbctl cbctl; enum qeth_ipa_sbp_cmd setcmd; QETH_CARD_TEXT(card, 2, "brsetrol"); @@ -1696,11 +1685,8 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role) iob = qeth_sbp_build_cmd(card, setcmd, cmdlength); if (!iob) return -ENOMEM; - rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, - (void *)&cbctl); - if (rc < 0) - return rc; - return qeth_bridgeport_makerc(card, &cbctl, setcmd); + + return qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb, NULL); } /** From 742d4d40831d3d2bd2dff0bd2c9488b944231cb2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 12 Feb 2019 18:33:25 +0100 Subject: [PATCH 1053/1436] s390/qeth: convert remaining legacy cmd callbacks This calls the existing errno translation helpers from the callbacks, adding trivial wrappers where necessary. For cmds that have no sophisticated errno translation, default to -EIO. For IPA cmds with no callback, fall back to a minimal default. This is currently being used by qeth_l3_send_setrouting(). Thus having all converted all callbacks, remove the legacy path in qeth_send_control_data_cb(). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 113 +++++++++++++++--------------- drivers/s390/net/qeth_l2_main.c | 37 +++++----- drivers/s390/net/qeth_l3_main.c | 74 ++++++++++++------- 3 files changed, 121 insertions(+), 103 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cdb65ba99888..5ca934775c42 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -867,12 +867,7 @@ static void qeth_send_control_data_cb(struct qeth_card *card, } if (rc <= 0) { - if (cmd) - reply->rc = (u16) cmd->hdr.return_code; - - /* for callbacks with proper errnos: */ - if (rc < 0) - reply->rc = rc; + reply->rc = rc; qeth_notify_reply(reply); } @@ -2039,15 +2034,13 @@ EXPORT_SYMBOL_GPL(qeth_prepare_control_data); * for the IPA commands. * @reply_param: private pointer passed to the callback * - * Returns the value of the `return_code' field of the response - * block returned from the hardware, or other error indication. - * Value of zero indicates successful execution of the command. - * * Callback function gets called one or more times, with cb_cmd * pointing to the response returned by the hardware. Callback - * function must return non-zero if more reply blocks are expected, - * and zero if the last or only reply block is received. Callback - * function can get the value of the reply_param pointer from the + * function must return + * > 0 if more reply blocks are expected, + * 0 if the last or only reply block is received, and + * < 0 on error. + * Callback function can get the value of the reply_param pointer from the * field 'param' of the structure qeth_reply. */ @@ -2876,6 +2869,14 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card, } EXPORT_SYMBOL_GPL(qeth_get_ipacmd_buffer); +static int qeth_send_ipa_cmd_cb(struct qeth_card *card, + struct qeth_reply *reply, unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + return (cmd->hdr.return_code) ? -EIO : 0; +} + /** * qeth_send_ipa_cmd() - send an IPA command * @@ -2891,6 +2892,9 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, int rc; QETH_CARD_TEXT(card, 4, "sendipa"); + + if (reply_cb == NULL) + reply_cb = qeth_send_ipa_cmd_cb; memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2); rc = qeth_send_control_data(card, length, iob, reply_cb, reply_param); if (rc == -ETIME) { @@ -2939,7 +2943,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 3, "quyadpcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = @@ -2994,19 +2998,18 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, cmd = (struct qeth_ipa_cmd *) data; switch (cmd->hdr.return_code) { + case IPA_RC_SUCCESS: + break; case IPA_RC_NOTSUPP: case IPA_RC_L2_UNSUPPORTED_CMD: QETH_DBF_TEXT(SETUP, 2, "ipaunsup"); card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS; card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS; - return 0; + return -EOPNOTSUPP; default: - if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", - CARD_DEVID(card), - cmd->hdr.return_code); - return 0; - } + QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", + CARD_DEVID(card), cmd->hdr.return_code); + return -EIO; } if (cmd->hdr.prot_version == QETH_PROT_IPV4) { @@ -3044,7 +3047,7 @@ static int qeth_query_switch_attributes_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "qswiatcb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; sw_info = (struct qeth_switch_info *)reply->param; attrs = &cmd->data.setadapterparms.data.query_switch_attributes; @@ -3076,15 +3079,15 @@ int qeth_query_switch_attributes(struct qeth_card *card, static int qeth_query_setdiagass_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - __u16 rc; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + u16 rc = cmd->hdr.return_code; - cmd = (struct qeth_ipa_cmd *)data; - rc = cmd->hdr.return_code; - if (rc) + if (rc) { QETH_CARD_TEXT_(card, 2, "diagq:%x", rc); - else - card->info.diagass_support = cmd->data.diagass.ext; + return -EIO; + } + + card->info.diagass_support = cmd->data.diagass.ext; return 0; } @@ -3131,13 +3134,13 @@ static void qeth_get_trap_id(struct qeth_card *card, struct qeth_trap_id *tid) static int qeth_hw_trap_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - __u16 rc; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + u16 rc = cmd->hdr.return_code; - cmd = (struct qeth_ipa_cmd *)data; - rc = cmd->hdr.return_code; - if (rc) + if (rc) { QETH_CARD_TEXT_(card, 2, "trapc:%x", rc); + return -EIO; + } return 0; } @@ -4196,7 +4199,7 @@ static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, setparms->data.mode = SET_PROMISC_MODE_OFF; } card->info.promisc_mode = setparms->data.mode; - return 0; + return (cmd->hdr.return_code) ? -EIO : 0; } void qeth_setadp_promisc_mode(struct qeth_card *card) @@ -4295,7 +4298,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "setaccb"); if (cmd->hdr.return_code) - return 0; + return -EIO; qeth_setadpparms_inspect_rc(cmd); access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; @@ -4369,7 +4372,7 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - return 0; + return (cmd->hdr.return_code) ? -EIO : 0; } static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card, @@ -4734,7 +4737,7 @@ static int qeth_query_card_info_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "qcrdincb"); if (qeth_setadpparms_inspect_rc(cmd)) - return 0; + return -EIO; card_info = &cmd->data.setadapterparms.data.card_info; carrier_info->card_type = card_info->card_type; @@ -5409,7 +5412,7 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card, struct qeth_ipa_caps *caps = reply->param; if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; caps->supported = cmd->data.setassparms.data.caps.supported; caps->enabled = cmd->data.setassparms.data.caps.enabled; @@ -5419,18 +5422,18 @@ static int qeth_setassparms_get_caps_cb(struct qeth_card *card, int qeth_setassparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "defadpcb"); - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) { - cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; - if (cmd->hdr.prot_version == QETH_PROT_IPV4) - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; - if (cmd->hdr.prot_version == QETH_PROT_IPV6) - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; - } + if (cmd->hdr.return_code) + return -EIO; + + cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; + if (cmd->hdr.prot_version == QETH_PROT_IPV4) + card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; + if (cmd->hdr.prot_version == QETH_PROT_IPV6) + card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; return 0; } EXPORT_SYMBOL_GPL(qeth_setassparms_cb); @@ -6303,7 +6306,7 @@ static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply, u32 *features = reply->param; if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; *features = cmd->data.setassparms.data.flags_32bit; return 0; @@ -6379,9 +6382,8 @@ static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype, static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype, enum qeth_prot_versions prot) { - int rc = (on) ? qeth_set_csum_on(card, cstype, prot) : - qeth_set_csum_off(card, cstype, prot); - return rc ? -EIO : 0; + return on ? qeth_set_csum_on(card, cstype, prot) : + qeth_set_csum_off(card, cstype, prot); } static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply, @@ -6391,7 +6393,7 @@ static int qeth_start_tso_cb(struct qeth_card *card, struct qeth_reply *reply, struct qeth_tso_start_data *tso_data = reply->param; if (qeth_setassparms_inspect_rc(cmd)) - return 0; + return -EIO; tso_data->mss = cmd->data.setassparms.data.tso.mss; tso_data->supported = cmd->data.setassparms.data.tso.supported; @@ -6457,10 +6459,7 @@ static int qeth_set_tso_on(struct qeth_card *card, static int qeth_set_ipa_tso(struct qeth_card *card, bool on, enum qeth_prot_versions prot) { - int rc = on ? qeth_set_tso_on(card, prot) : - qeth_set_tso_off(card, prot); - - return rc ? -EIO : 0; + return on ? qeth_set_tso_on(card, prot) : qeth_set_tso_off(card, prot); } static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 955fb24af418..c566139da43d 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -35,7 +35,7 @@ static void qeth_l2_vnicc_init(struct qeth_card *card); static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, u32 *timeout); -static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) +static int qeth_l2_setdelmac_makerc(struct qeth_card *card, u16 retcode) { int rc; @@ -62,9 +62,6 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) case IPA_RC_L2_MAC_NOT_FOUND: rc = -ENOENT; break; - case -ENOMEM: - rc = -ENOMEM; - break; default: rc = -EIO; break; @@ -72,6 +69,15 @@ static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) return rc; } +static int qeth_l2_send_setdelmac_cb(struct qeth_card *card, + struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + return qeth_l2_setdelmac_makerc(card, cmd->hdr.return_code); +} + static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, enum qeth_ipa_cmds ipacmd) { @@ -85,8 +91,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, cmd = __ipa_cmd(iob); cmd->data.setdelmac.mac_length = ETH_ALEN; ether_addr_copy(cmd->data.setdelmac.mac, mac); - return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob, - NULL, NULL)); + return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelmac_cb, NULL); } static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) @@ -205,7 +210,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, } } -static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) +static int qeth_l2_setdelvlan_makerc(struct qeth_card *card, u16 retcode) { if (retcode) QETH_CARD_TEXT_(card, 2, "err%04x", retcode); @@ -221,8 +226,6 @@ static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) return -ENOENT; case IPA_RC_L2_VLAN_ID_NOT_ALLOWED: return -EPERM; - case -ENOMEM: - return -ENOMEM; default: return -EIO; } @@ -240,9 +243,8 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, cmd->data.setdelvlan.vlan_id, CARD_DEVID(card), cmd->hdr.return_code); QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command); - QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code); } - return 0; + return qeth_l2_setdelvlan_makerc(card, cmd->hdr.return_code); } static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, @@ -257,8 +259,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, return -ENOMEM; cmd = __ipa_cmd(iob); cmd->data.setdelvlan.vlan_id = i; - return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob, - qeth_l2_send_setdelvlan_cb, NULL)); + return qeth_send_ipa_cmd(card, iob, qeth_l2_send_setdelvlan_cb, NULL); } static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, @@ -1790,7 +1791,7 @@ static bool qeth_bridgeport_is_in_use(struct qeth_card *card) /* VNIC Characteristics support */ /* handle VNICC IPA command return codes; convert to error codes */ -static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) +static int qeth_l2_vnicc_makerc(struct qeth_card *card, u16 ipa_rc) { int rc; @@ -1848,7 +1849,7 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "vniccrcb"); if (cmd->hdr.return_code) - return 0; + return qeth_l2_vnicc_makerc(card, cmd->hdr.return_code); /* return results to caller */ card->options.vnicc.sup_chars = rep->hdr.sup; card->options.vnicc.cur_chars = rep->hdr.cur; @@ -1869,7 +1870,6 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, struct qeth_ipacmd_vnicc *req; struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; - int rc; QETH_CARD_TEXT(card, 2, "vniccreq"); @@ -1912,10 +1912,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, } /* send request */ - rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, - (void *) cbctl); - - return qeth_l2_vnicc_makerc(card, rc); + return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, cbctl); } /* VNICC query VNIC characteristics request */ diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 852bfd5cc3e9..8eb24c7f2750 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -957,12 +957,13 @@ static int qeth_l3_start_ipassists(struct qeth_card *card) static int qeth_l3_iqd_read_initial_mac_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) - ether_addr_copy(card->dev->dev_addr, - cmd->data.create_destroy_addr.unique_id); + if (cmd->hdr.return_code) + return -EIO; + + ether_addr_copy(card->dev->dev_addr, + cmd->data.create_destroy_addr.unique_id); return 0; } @@ -990,19 +991,18 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card) static int qeth_l3_get_unique_id_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (cmd->hdr.return_code == 0) { card->info.unique_id = *((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]); - else { - card->info.unique_id = UNIQUE_ID_IF_CREATE_ADDR_FAILED | - UNIQUE_ID_NOT_BY_CARD; - dev_warn(&card->gdev->dev, "The network adapter failed to " - "generate a unique ID\n"); + return 0; } - return 0; + + card->info.unique_id = UNIQUE_ID_IF_CREATE_ADDR_FAILED | + UNIQUE_ID_NOT_BY_CARD; + dev_warn(&card->gdev->dev, "The network adapter failed to generate a unique ID\n"); + return -EIO; } static int qeth_l3_get_unique_id(struct qeth_card *card) @@ -1085,7 +1085,7 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply, cmd->data.diagass.action, CARD_DEVID(card)); } - return 0; + return rc ? -EIO : 0; } static int @@ -1524,7 +1524,7 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) qeth_l3_handle_promisc_mode(card); } -static int qeth_l3_arp_makerc(int rc) +static int qeth_l3_arp_makerc(u16 rc) { switch (rc) { case IPA_RC_SUCCESS: @@ -1541,8 +1541,18 @@ static int qeth_l3_arp_makerc(int rc) } } +static int qeth_l3_arp_cmd_cb(struct qeth_card *card, struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + + qeth_setassparms_cb(card, reply, data); + return qeth_l3_arp_makerc(cmd->hdr.return_code); +} + static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) { + struct qeth_cmd_buffer *iob; int rc; QETH_CARD_TEXT(card, 3, "arpstnoe"); @@ -1557,13 +1567,19 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) { return -EOPNOTSUPP; } - rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_SET_NO_ENTRIES, - no_entries); + + iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, + IPA_CMD_ASS_ARP_SET_NO_ENTRIES, 4, + QETH_PROT_IPV4); + if (!iob) + return -ENOMEM; + + __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (u32) no_entries; + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static __u32 get_arp_entry_size(struct qeth_card *card, @@ -1792,16 +1808,16 @@ static int qeth_l3_arp_modify_entry(struct qeth_card *card, cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry; ether_addr_copy(cmd_entry->macaddr, entry->macaddr); memcpy(cmd_entry->ipaddr, entry->ipaddr, 4); - rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL); + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n", arp_cmd, CARD_DEVID(card), rc); - - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_arp_flush_cache(struct qeth_card *card) { + struct qeth_cmd_buffer *iob; int rc; QETH_CARD_TEXT(card, 3, "arpflush"); @@ -1816,12 +1832,18 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card) if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) { return -EOPNOTSUPP; } - rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_FLUSH_CACHE, 0); + + iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, + IPA_CMD_ASS_ARP_FLUSH_CACHE, 0, + QETH_PROT_IPV4); + if (!iob) + return -ENOMEM; + + rc = qeth_send_ipa_cmd(card, iob, qeth_l3_arp_cmd_cb, NULL); if (rc) QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n", CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); + return rc; } static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From b6cd7dd99a89ee2f0595bf3e62682a3cde3d4a62 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 11 Feb 2019 16:34:44 -0600 Subject: [PATCH 1054/1436] ser_gigaset: mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/isdn/gigaset/ser-gigaset.c: In function ‘gigaset_tty_ioctl’: drivers/isdn/gigaset/ser-gigaset.c:627:3: warning: this statement may fall through [-Wimplicit-fallthrough=] switch (arg) { ^~~~~~ drivers/isdn/gigaset/ser-gigaset.c:638:2: note: here default: ^~~~~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Acked-by: Paul Bolle Signed-off-by: David S. Miller --- drivers/isdn/gigaset/ser-gigaset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index ab0b63a4d045..e1de8b1a1001 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -633,7 +633,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file, flush_send_queue(cs); break; } - /* Pass through */ + /* fall through */ default: /* pass through to underlying serial device */ From 56e9b6b9601c113b1c19c61e70ac972444cb40aa Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 11 Feb 2019 16:38:21 -0600 Subject: [PATCH 1055/1436] isdn: i4l: isdn_tty: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/isdn/i4l/isdn_tty.c: In function ‘isdn_tty_edit_at’: drivers/isdn/i4l/isdn_tty.c:3644:18: warning: this statement may fall through [-Wimplicit-fallthrough=] m->mdmcmdl = 0; ~~~~~~~~~~~^~~ drivers/isdn/i4l/isdn_tty.c:3646:5: note: here case 0: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index dc1cded716c1..43700fc19a31 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -3642,7 +3642,7 @@ isdn_tty_edit_at(const char *p, int count, modem_info *info) break; } else m->mdmcmdl = 0; - /* Fall through, check for 'A' */ + /* Fall through - check for 'A' */ case 0: if (c == 'A') { m->mdmcmd[m->mdmcmdl] = c; From b67de691f60b471401798498c1bc68e63b00708d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 11 Feb 2019 16:42:37 -0600 Subject: [PATCH 1056/1436] isdn_v110: mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/isdn/i4l/isdn_v110.c: In function ‘EncodeMatrix’: drivers/isdn/i4l/isdn_v110.c:353:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (line >= mlen) { ^ drivers/isdn/i4l/isdn_v110.c:358:3: note: here case 128: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_v110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/i4l/isdn_v110.c b/drivers/isdn/i4l/isdn_v110.c index 2a5f6668756c..d11fe76f138f 100644 --- a/drivers/isdn/i4l/isdn_v110.c +++ b/drivers/isdn/i4l/isdn_v110.c @@ -354,7 +354,7 @@ EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen) printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n"); return line; } - /* else: fall through */ + /* fall through */ case 128: m[line] = 128; /* leftmost -> set byte to 1000000 */ mbit = 64; /* current bit in the matrix line */ From 4ffcbfac60642f63ae3d80891f573ba7e94a265c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Feb 2019 14:41:22 -0800 Subject: [PATCH 1057/1436] batman-adv: fix uninit-value in batadv_interface_tx() KMSAN reported batadv_interface_tx() was possibly using a garbage value [1] batadv_get_vid() does have a pskb_may_pull() call but batadv_interface_tx() does not actually make sure this did not fail. [1] BUG: KMSAN: uninit-value in batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 CPU: 0 PID: 10006 Comm: syz-executor469 Not tainted 4.20.0-rc7+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 __netdev_start_xmit include/linux/netdevice.h:4356 [inline] netdev_start_xmit include/linux/netdevice.h:4365 [inline] xmit_one net/core/dev.c:3257 [inline] dev_hard_start_xmit+0x607/0xc40 net/core/dev.c:3273 __dev_queue_xmit+0x2e42/0x3bc0 net/core/dev.c:3843 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3876 packet_snd net/packet/af_packet.c:2928 [inline] packet_sendmsg+0x8306/0x8f30 net/packet/af_packet.c:2953 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] __sys_sendto+0x8c4/0xac0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1796 __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x441889 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffdda6fd468 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 0000000000441889 RDX: 000000000000000e RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00007ffdda6fd4c0 R13: 00007ffdda6fd4b0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2759 [inline] __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 __kmalloc_reserve net/core/skbuff.c:137 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:998 [inline] alloc_skb_with_frags+0x1c7/0xac0 net/core/skbuff.c:5220 sock_alloc_send_pskb+0xafd/0x10e0 net/core/sock.c:2083 packet_alloc_skb net/packet/af_packet.c:2781 [inline] packet_snd net/packet/af_packet.c:2872 [inline] packet_sendmsg+0x661a/0x8f30 net/packet/af_packet.c:2953 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] __sys_sendto+0x8c4/0xac0 net/socket.c:1788 __do_sys_sendto net/socket.c:1800 [inline] __se_sys_sendto+0x107/0x130 net/socket.c:1796 __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Signed-off-by: David S. Miller --- net/batman-adv/soft-interface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b85ca809e509..ffc83bebfe40 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -227,6 +227,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ From 1ec17dbd90f8b638f41ee650558609c1af63dfa0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 9 Feb 2019 13:35:52 +0300 Subject: [PATCH 1058/1436] inet_diag: fix reporting cgroup classid and fallback to priority Field idiag_ext in struct inet_diag_req_v2 used as bitmap of requested extensions has only 8 bits. Thus extensions starting from DCTCPINFO cannot be requested directly. Some of them included into response unconditionally or hook into some of lower 8 bits. Extension INET_DIAG_CLASS_ID has not way to request from the beginning. This patch bundle it with INET_DIAG_TCLASS (ipv6 tos), fixes space reservation, and documents behavior for other extensions. Also this patch adds fallback to reporting socket priority. This filed is more widely used for traffic classification because ipv4 sockets automatically maps TOS to priority and default qdisc pfifo_fast knows about that. But priority could be changed via setsockopt SO_PRIORITY so INET_DIAG_TOS isn't enough for predicting class. Also cgroup2 obsoletes net_cls classid (it always zero), but we cannot reuse this field for reporting cgroup2 id because it is 64-bit (ino+gen). So, after this patch INET_DIAG_CLASS_ID will report socket priority for most common setup when net_cls isn't set and/or cgroup2 in use. Fixes: 0888e372c37f ("net: inet: diag: expose sockets cgroup classid") Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 16 +++++++++++----- net/ipv4/inet_diag.c | 10 +++++++++- net/sctp/diag.c | 1 + 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 14565d703291..e8baca85bac6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -137,15 +137,21 @@ enum { INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, - INET_DIAG_DCTCPINFO, - INET_DIAG_PROTOCOL, /* response attribute only */ + + /* + * Next extenstions cannot be requested in struct inet_diag_req_v2: + * its field idiag_ext has only 8 bits. + */ + + INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_PROTOCOL, /* response attribute only */ INET_DIAG_SKV6ONLY, INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, - INET_DIAG_MARK, - INET_DIAG_BBRINFO, - INET_DIAG_CLASS_ID, + INET_DIAG_MARK, /* only with CAP_NET_ADMIN */ + INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ + INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1a4e9ff02762..5731670c560b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 078f01a8d582..435847d98b51 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) From fc62814d690cf62189854464f4bd07457d5e9e50 Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Sun, 10 Feb 2019 09:57:11 +0100 Subject: [PATCH 1059/1436] net/packet: fix 4gb buffer limit due to overflow check When calculating rb->frames_per_block * req->tp_block_nr the result can overflow. Check it for overflow without limiting the total buffer size to UINT_MAX. This change fixes support for packet ring buffers >= UINT_MAX. Fixes: 8f8d28e4d6d8 ("net/packet: fix overflow in check for tp_frame_nr") Signed-off-by: Kal Conley Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3b1a78906bc0..1cd1d83a4be0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) From c266f64dbfa2a970a13b0574246c0ddfec492365 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:32 +0200 Subject: [PATCH 1060/1436] net: sched: protect block state with mutex Currently, tcf_block doesn't use any synchronization mechanisms to protect critical sections that manage lifetime of its chains. block->chain_list and multiple variables in tcf_chain that control its lifetime assume external synchronization provided by global rtnl lock. Converting chain reference counting to atomic reference counters is not possible because cls API uses multiple counters and flags to control chain lifetime, so all of them must be synchronized in chain get/put code. Use single per-block lock to protect block data and manage lifetime of all chains on the block. Always take block->lock when accessing chain_list. Chain get and put modify chain lifetime-management data and parent block's chain_list, so take the lock in these functions. Verify block->lock state with assertions in functions that expect to be called with the lock taken and are called from multiple places. Take block->lock when accessing filter_chain_list. In order to allow parallel update of rules on single block, move all calls to classifiers outside of critical sections protected by new block->lock. Rearrange chain get and put functions code to only access protected chain data while holding block lock: - Rearrange code to only access chain reference counter and chain action reference counter while holding block lock. - Extract code that requires block->lock from tcf_chain_destroy() into standalone tcf_chain_destroy() function that is called by __tcf_chain_put() in same critical section that changes chain reference counters. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++ net/sched/cls_api.c | 84 +++++++++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7a4957599874..31b8ea66a47d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -352,6 +353,10 @@ struct tcf_chain { }; struct tcf_block { + /* Lock protects tcf_block and lifetime-management data of chains + * attached to the block (refcnt, action_refcnt, explicitly_created). + */ + struct mutex lock; struct list_head chain_list; u32 index; /* block index for shared blocks */ refcount_t refcnt; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 02cf6d2fa0e1..806e7158a7e8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -201,6 +201,9 @@ static void tcf_proto_destroy(struct tcf_proto *tp, kfree_rcu(tp, rcu); } +#define ASSERT_BLOCK_LOCKED(block) \ + lockdep_assert_held(&(block)->lock) + struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; @@ -212,6 +215,8 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; @@ -243,25 +248,51 @@ static void tcf_chain0_head_change(struct tcf_chain *chain, tcf_chain_head_change_item(item, tp_head); } -static void tcf_chain_destroy(struct tcf_chain *chain) +/* Returns true if block can be safely freed. */ + +static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; + ASSERT_BLOCK_LOCKED(block); + list_del(&chain->list); if (!chain->index) block->chain0.chain = NULL; + + if (list_empty(&block->chain_list) && + refcount_read(&block->refcnt) == 0) + return true; + + return false; +} + +static void tcf_block_destroy(struct tcf_block *block) +{ + mutex_destroy(&block->lock); + kfree_rcu(block, rcu); +} + +static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) +{ + struct tcf_block *block = chain->block; + kfree(chain); - if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) - kfree_rcu(block, rcu); + if (free_block) + tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + /* In case all the references are action references, this * chain should not be shown to the user. */ @@ -273,6 +304,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; @@ -287,31 +320,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { - struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); + struct tcf_chain *chain = NULL; + bool is_first_reference; + mutex_lock(&block->lock); + chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) - return NULL; + goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) - return NULL; + goto errout; } if (by_act) ++chain->action_refcnt; + is_first_reference = chain->refcnt - chain->action_refcnt == 1; + mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ - if (chain->refcnt - chain->action_refcnt == 1 && !by_act) + if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); return chain; + +errout: + mutex_unlock(&block->lock); + return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -330,17 +372,31 @@ static void tc_chain_tmplt_del(struct tcf_chain *chain); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) { + struct tcf_block *block = chain->block; + bool is_last, free_block = false; + unsigned int refcnt; + + mutex_lock(&block->lock); if (by_act) chain->action_refcnt--; - chain->refcnt--; + + /* tc_chain_notify_delete can't be called while holding block lock. + * However, when block is unlocked chain can be changed concurrently, so + * save these to temporary variables. + */ + refcnt = --chain->refcnt; + is_last = refcnt - chain->action_refcnt == 0; + if (refcnt == 0) + free_block = tcf_chain_detach(chain); + mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (chain->refcnt - chain->action_refcnt == 0 && !by_act) + if (is_last && !by_act) tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); - if (chain->refcnt == 0) { + if (refcnt == 0) { tc_chain_tmplt_del(chain); - tcf_chain_destroy(chain); + tcf_chain_destroy(chain, free_block); } } @@ -772,6 +828,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } + mutex_init(&block->lock); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); @@ -835,7 +892,7 @@ static void tcf_block_put_all_chains(struct tcf_block *block) static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - if (refcount_dec_and_test(&block->refcnt)) { + if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { /* Flushing/putting all chains will cause the block to be * deallocated when last chain is freed. However, if chain_list * is empty, block has to be manually deallocated. After block @@ -844,6 +901,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, */ bool free_block = list_empty(&block->chain_list); + mutex_unlock(&block->lock); if (tcf_block_shared(block)) tcf_block_remove(block, block->net); if (!free_block) @@ -853,7 +911,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, tcf_block_offload_unbind(block, q, ei); if (free_block) - kfree_rcu(block, rcu); + tcf_block_destroy(block); else tcf_block_put_all_chains(block); } else if (q) { From 91052fa1c657d83d570a3ab0ca0ddf87713b5cc8 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:33 +0200 Subject: [PATCH 1061/1436] net: sched: protect chain->explicitly_created with block->lock In order to remove dependency on rtnl lock, protect tcf_chain->explicitly_created flag with block->lock. Consolidate code that checks and resets 'explicitly_created' flag into __tcf_chain_put() to execute it atomically with rest of code that puts chain reference. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 806e7158a7e8..2ebf8e53038a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -370,13 +370,22 @@ EXPORT_SYMBOL(tcf_chain_get_by_act); static void tc_chain_tmplt_del(struct tcf_chain *chain); -static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) +static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, + bool explicitly_created) { struct tcf_block *block = chain->block; bool is_last, free_block = false; unsigned int refcnt; mutex_lock(&block->lock); + if (explicitly_created) { + if (!chain->explicitly_created) { + mutex_unlock(&block->lock); + return; + } + chain->explicitly_created = false; + } + if (by_act) chain->action_refcnt--; @@ -402,19 +411,18 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) static void tcf_chain_put(struct tcf_chain *chain) { - __tcf_chain_put(chain, false); + __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { - __tcf_chain_put(chain, true); + __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { - if (chain->explicitly_created) - tcf_chain_put(chain); + __tcf_chain_put(chain, false, true); } static void tcf_chain_flush(struct tcf_chain *chain) @@ -2305,7 +2313,6 @@ replay: * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); - chain->explicitly_created = false; break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, From 2cbfab07c69657a5a60323f86143f4ce8a829fa9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:34 +0200 Subject: [PATCH 1062/1436] net: sched: refactor tc_ctl_chain() to use block->lock In order to remove dependency on rtnl lock, modify chain API to use block->lock to protect chain from concurrent modification. Rearrange tc_ctl_chain() code to call tcf_chain_hold() while holding block->lock to prevent concurrent chain removal. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ebf8e53038a..b5db0f79db14 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2255,6 +2255,8 @@ replay: err = -EINVAL; goto errout_block; } + + mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { @@ -2266,41 +2268,49 @@ replay: } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; - goto errout_block; + goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; - goto errout_block; + goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; - goto errout_block; + goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; - goto errout_block; + goto errout_block_locked; } tcf_chain_hold(chain); } - switch (n->nlmsg_type) { - case RTM_NEWCHAIN: - err = tc_chain_tmplt_add(chain, net, tca, extack); - if (err) - goto errout; - /* In case the chain was successfully added, take a reference - * to the chain. This ensures that an empty chain - * does not disappear at the end of this function. + if (n->nlmsg_type == RTM_NEWCHAIN) { + /* Modifying chain requires holding parent block lock. In case + * the chain was successfully added, take a reference to the + * chain. This ensures that an empty chain does not disappear at + * the end of this function. */ tcf_chain_hold(chain); chain->explicitly_created = true; + } + mutex_unlock(&block->lock); + + switch (n->nlmsg_type) { + case RTM_NEWCHAIN: + err = tc_chain_tmplt_add(chain, net, tca, extack); + if (err) { + tcf_chain_put_explicitly_created(chain); + goto errout; + } + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); break; @@ -2334,6 +2344,10 @@ errout_block: /* Replay the request. */ goto replay; return err; + +errout_block_locked: + mutex_unlock(&block->lock); + goto errout_block; } /* called with RTNL */ From 165f01354c52b6ce175b2c67f771c153f0563f41 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:35 +0200 Subject: [PATCH 1063/1436] net: sched: protect block->chain0 with block->lock In order to remove dependency on rtnl lock, use block->lock to protect chain0 struct from concurrent modification. Rearrange code in chain0 callback add and del functions to only access chain0 when block->lock is held. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b5db0f79db14..869ae44d7631 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -244,8 +244,11 @@ static void tcf_chain0_head_change(struct tcf_chain *chain, if (chain->index) return; + + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); + mutex_unlock(&block->lock); } /* Returns true if block can be safely freed. */ @@ -756,8 +759,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { @@ -766,9 +769,14 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; + + mutex_lock(&block->lock); + chain0 = block->chain0.chain; if (chain0 && chain0->filter_chain) tcf_chain_head_change_item(item, chain0->filter_chain); list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + return 0; } @@ -776,20 +784,23 @@ static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { - if (chain0) + if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); + mutex_unlock(&block->lock); + kfree(item); return; } } + mutex_unlock(&block->lock); WARN_ON(1); } From bbf73830cd48cff1599811d4f69c7cfd49c7b869 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:36 +0200 Subject: [PATCH 1064/1436] net: sched: traverse chains in block with tcf_get_next_chain() All users of block->chain_list rely on rtnl lock and assume that no new chains are added when traversing the list. Use tcf_get_next_chain() to traverse chain list without relying on rtnl mutex. This function iterates over chains by taking reference to current iterator chain only and doesn't assume external synchronization of chain list. Don't take reference to all chains in block when flushing and use tcf_get_next_chain() to safely iterate over chain list instead. Remove tcf_block_put_all_chains() that is no longer used. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 + net/sched/cls_api.c | 102 ++++++++++++++++++++++++++++++------------ net/sched/sch_api.c | 4 +- 3 files changed, 79 insertions(+), 29 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cb8be396a11f..38bee7dd21d1 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -44,6 +44,8 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index); void tcf_chain_put_by_act(struct tcf_chain *chain); +struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, + struct tcf_chain *chain); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 869ae44d7631..8e2ac785f6fd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -883,28 +883,62 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) return block; } +static struct tcf_chain * +__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) +{ + mutex_lock(&block->lock); + if (chain) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + else + chain = list_first_entry_or_null(&block->chain_list, + struct tcf_chain, list); + + /* skip all action-only chains */ + while (chain && tcf_chain_held_by_acts_only(chain)) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + + if (chain) + tcf_chain_hold(chain); + mutex_unlock(&block->lock); + + return chain; +} + +/* Function to be used by all clients that want to iterate over all chains on + * block. It properly obtains block->lock and takes reference to chain before + * returning it. Users of this function must be tolerant to concurrent chain + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_chain * +tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) +{ + struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); + + if (chain) + tcf_chain_put(chain); + + return chain_next; +} +EXPORT_SYMBOL(tcf_get_next_chain); + static void tcf_block_flush_all_chains(struct tcf_block *block) { struct tcf_chain *chain; - /* Hold a refcnt for all chains, so that they don't disappear - * while we are iterating. + /* Last reference to block. At this point chains cannot be added or + * removed concurrently. */ - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_hold(chain); - - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); -} - -static void tcf_block_put_all_chains(struct tcf_block *block) -{ - struct tcf_chain *chain, *tmp; - - /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { tcf_chain_put_explicitly_created(chain); - tcf_chain_put(chain); + tcf_chain_flush(chain); } } @@ -923,8 +957,6 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, mutex_unlock(&block->lock); if (tcf_block_shared(block)) tcf_block_remove(block, block->net); - if (!free_block) - tcf_block_flush_all_chains(block); if (q) tcf_block_offload_unbind(block, q, ei); @@ -932,7 +964,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, if (free_block) tcf_block_destroy(block); else - tcf_block_put_all_chains(block); + tcf_block_flush_all_chains(block); } else if (q) { tcf_block_offload_unbind(block, q, ei); } @@ -1266,11 +1298,15 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { - struct tcf_chain *chain; + struct tcf_chain *chain, *chain_prev; struct tcf_proto *tp; int err; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { for (tp = rtnl_dereference(chain->filter_chain); tp; tp = rtnl_dereference(tp->next)) { if (tp->ops->reoffload) { @@ -1289,6 +1325,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: + tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; @@ -2023,11 +2060,11 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2091,12 +2128,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index)) { + tcf_chain_put(chain); err = -EMSGSIZE; break; } @@ -2364,11 +2406,11 @@ errout_block_locked: /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2432,7 +2474,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; @@ -2440,14 +2486,14 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } - if (tcf_chain_held_by_acts_only(chain)) - continue; err = tc_chain_fill_node(chain, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); - if (err <= 0) + if (err <= 0) { + tcf_chain_put(chain); break; + } index++; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 03e26e8d0ec9..80058abc729f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1909,7 +1909,9 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, block = cops->tcf_block(q, cl, NULL); if (!block) return; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; for (tp = rtnl_dereference(chain->filter_chain); From a5654820bb4b1f5f21eb83b8fb9de79ce1382d2b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:37 +0200 Subject: [PATCH 1065/1436] net: sched: protect chain template accesses with block lock When cls API is called without protection of rtnl lock, parallel modification of chain is possible, which means that chain template can be changed concurrently in certain circumstances. For example, when chain is 'deleted' by new user-space chain API, the chain might continue to be used if it is referenced by actions, and can be 're-created' again by user. In such case same chain structure is reused and its template is changed. To protect from described scenario, cache chain template while holding block lock. Introduce standalone tc_chain_notify_delete() function that works with cached template values, instead of chains themselves. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 73 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e2ac785f6fd..0dcce8b0c7b4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -371,14 +371,22 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) } EXPORT_SYMBOL(tcf_chain_get_by_act); -static void tc_chain_tmplt_del(struct tcf_chain *chain); +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv); +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, bool explicitly_created) { struct tcf_block *block = chain->block; + const struct tcf_proto_ops *tmplt_ops; bool is_last, free_block = false; unsigned int refcnt; + void *tmplt_priv; + u32 chain_index; mutex_lock(&block->lock); if (explicitly_created) { @@ -398,16 +406,21 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, */ refcnt = --chain->refcnt; is_last = refcnt - chain->action_refcnt == 0; + tmplt_ops = chain->tmplt_ops; + tmplt_priv = chain->tmplt_priv; + chain_index = chain->index; + if (refcnt == 0) free_block = tcf_chain_detach(chain); mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ if (is_last && !by_act) - tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); + tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, + block, NULL, 0, 0, false); if (refcnt == 0) { - tc_chain_tmplt_del(chain); + tc_chain_tmplt_del(tmplt_ops, tmplt_priv); tcf_chain_destroy(chain, free_block); } } @@ -2155,8 +2168,10 @@ out: return skb->len; } -static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, - struct sk_buff *skb, struct tcf_block *block, +static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct net *net, struct sk_buff *skb, + struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event) { unsigned char *b = skb_tail_pointer(skb); @@ -2165,8 +2180,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, struct tcmsg *tcm; void *priv; - ops = chain->tmplt_ops; - priv = chain->tmplt_priv; + ops = tmplt_ops; + priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2184,7 +2199,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, tcm->tcm_block_index = block->index; } - if (nla_put_u32(skb, TCA_CHAIN, chain->index)) + if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { @@ -2215,7 +2230,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tc_chain_fill_node(chain, net, skb, block, portid, + if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, portid, seq, flags, event) <= 0) { kfree_skb(skb); return -EINVAL; @@ -2227,6 +2243,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = block->net; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, + block, portid, seq, flags, RTM_DELCHAIN) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); +} + static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) @@ -2256,16 +2297,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return 0; } -static void tc_chain_tmplt_del(struct tcf_chain *chain) +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv) { - const struct tcf_proto_ops *ops = chain->tmplt_ops; - /* If template ops are set, no work to do for us. */ - if (!ops) + if (!tmplt_ops) return; - ops->tmplt_destroy(chain->tmplt_priv); - module_put(ops->owner); + tmplt_ops->tmplt_destroy(tmplt_priv); + module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ @@ -2486,7 +2526,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } - err = tc_chain_fill_node(chain, net, skb, block, + err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); From ed76f5edccc98fa66f2337f0b3b255d6e1a568b7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:38 +0200 Subject: [PATCH 1066/1436] net: sched: protect filter_chain list with filter_chain_lock mutex Extend tcf_chain with new filter_chain_lock mutex. Always lock the chain when accessing filter_chain list, instead of relying on rtnl lock. Dereference filter_chain with tcf_chain_dereference() lockdep macro to verify that all users of chain_list have the lock taken. Rearrange tp insert/remove code in tc_new_tfilter/tc_del_tfilter to execute all necessary code while holding chain lock in order to prevent invalidation of chain_info structure by potential concurrent change. This also serializes calls to tcf_chain0_head_change(), which allows head change callbacks to rely on filter_chain_lock for synchronization instead of rtnl mutex. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 17 ++++++ net/sched/cls_api.c | 111 +++++++++++++++++++++++++++----------- net/sched/sch_generic.c | 6 ++- 3 files changed, 101 insertions(+), 33 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 31b8ea66a47d..85993d7efee6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -341,6 +341,8 @@ struct qdisc_skb_cb { typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); struct tcf_chain { + /* Protects filter_chain. */ + struct mutex filter_chain_lock; struct tcf_proto __rcu *filter_chain; struct list_head list; struct tcf_block *block; @@ -374,6 +376,21 @@ struct tcf_block { struct rcu_head rcu; }; +#ifdef CONFIG_PROVE_LOCKING +static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) +{ + return lockdep_is_held(&chain->filter_chain_lock); +} +#else +static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) +{ + return true; +} +#endif /* #ifdef CONFIG_PROVE_LOCKING */ + +#define tcf_chain_dereference(p, chain) \ + rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) + static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0dcce8b0c7b4..3fce30ae9a9b 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -221,6 +221,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, if (!chain) return NULL; list_add_tail(&chain->list, &block->chain_list); + mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; @@ -280,6 +281,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) { struct tcf_block *block = chain->block; + mutex_destroy(&chain->filter_chain_lock); kfree(chain); if (free_block) tcf_block_destroy(block); @@ -443,9 +445,13 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) static void tcf_chain_flush(struct tcf_chain *chain) { - struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); + struct tcf_proto *tp; + mutex_lock(&chain->filter_chain_lock); + tp = tcf_chain_dereference(chain->filter_chain, chain); tcf_chain0_head_change(chain, NULL); + mutex_unlock(&chain->filter_chain_lock); + while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_proto_destroy(tp, NULL); @@ -785,11 +791,29 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, mutex_lock(&block->lock); chain0 = block->chain0.chain; - if (chain0 && chain0->filter_chain) - tcf_chain_head_change_item(item, chain0->filter_chain); - list_add(&item->list, &block->chain0.filter_chain_list); + if (chain0) + tcf_chain_hold(chain0); + else + list_add(&item->list, &block->chain0.filter_chain_list); mutex_unlock(&block->lock); + if (chain0) { + struct tcf_proto *tp_head; + + mutex_lock(&chain0->filter_chain_lock); + + tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); + if (tp_head) + tcf_chain_head_change_item(item, tp_head); + + mutex_lock(&block->lock); + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + mutex_unlock(&chain0->filter_chain_lock); + tcf_chain_put(chain0); + } + return 0; } @@ -1464,9 +1488,10 @@ struct tcf_chain_info { struct tcf_proto __rcu *next; }; -static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, + struct tcf_chain_info *chain_info) { - return rtnl_dereference(*chain_info->pprev); + return tcf_chain_dereference(*chain_info->pprev, chain); } static void tcf_chain_tp_insert(struct tcf_chain *chain, @@ -1475,7 +1500,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, { if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); tcf_chain_hold(chain); } @@ -1484,7 +1509,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - struct tcf_proto *next = rtnl_dereference(chain_info->next); + struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); @@ -1502,7 +1527,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; - (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + (tp = tcf_chain_dereference(*pprev, chain)); + pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate || @@ -1710,12 +1736,13 @@ replay: goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); - goto errout; + goto errout_locked; } if (tp == NULL) { @@ -1724,29 +1751,37 @@ replay: if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; - goto errout; + goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; - goto errout; + goto errout_locked; } if (prio_allocate) - prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); + prio = tcf_auto_prio(tcf_chain_tp_prev(chain, + &chain_info)); + mutex_unlock(&chain->filter_chain_lock); tp = tcf_proto_create(nla_data(tca[TCA_KIND]), protocol, prio, chain, extack); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; } + + mutex_lock(&chain->filter_chain_lock); + tcf_chain_tp_insert(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; - goto errout; + goto errout_locked; + } else { + mutex_unlock(&chain->filter_chain_lock); } fh = tp->ops->get(tp, t->tcm_handle); @@ -1772,15 +1807,11 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, extack); - if (err == 0) { - if (tp_created) - tcf_chain_tp_insert(chain, &chain_info, tp); + if (err == 0) tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false); - } else { - if (tp_created) - tcf_proto_destroy(tp, NULL); - } + else if (tp_created) + tcf_proto_destroy(tp, NULL); errout: if (chain) @@ -1790,6 +1821,10 @@ errout: /* Replay the request. */ goto replay; return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1865,31 +1900,34 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; - goto errout; + goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; + goto errout_locked; + } else if (t->tcm_handle == 0) { + tcf_chain_tp_remove(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); + + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, false); + tcf_proto_destroy(tp, extack); + err = 0; goto errout; } + mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { - if (t->tcm_handle == 0) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); - err = 0; - } else { - NL_SET_ERR_MSG(extack, "Specified filter handle not found"); - err = -ENOENT; - } + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; } else { bool last; @@ -1899,7 +1937,10 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (err) goto errout; if (last) { + mutex_lock(&chain->filter_chain_lock); tcf_chain_tp_remove(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); + tcf_proto_destroy(tp, extack); } } @@ -1909,6 +1950,10 @@ errout: tcf_chain_put(chain); tcf_block_release(q, block); return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1966,8 +2011,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); + mutex_unlock(&chain->filter_chain_lock); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 66ba2ce2320f..e24568f9246c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head) void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head) { - struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + /* Protected with chain0->filter_chain_lock. + * Can't access chain directly because tp_head can be NULL. + */ + struct mini_Qdisc *miniq_old = + rcu_dereference_protected(*miniqp->p_miniq, 1); struct mini_Qdisc *miniq; if (!tp_head) { From 4dbfa766440c6dfe3d10f077cde966a7d11b58f1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:39 +0200 Subject: [PATCH 1067/1436] net: sched: introduce reference counting for tcf_proto In order to remove dependency on rtnl lock and allow concurrent tcf_proto modification, extend tcf_proto with reference counter. Implement helper get/put functions for tcf proto and use them to modify cls API to always take reference to tcf_proto while using it. Only release reference to parent chain after releasing last reference to tp. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/cls_api.c | 53 +++++++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 85993d7efee6..4372c08fc4d9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -322,6 +322,7 @@ struct tcf_proto { void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; + refcount_t refcnt; struct rcu_head rcu; }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3fce30ae9a9b..37c05b96898f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -180,6 +180,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { @@ -193,14 +194,29 @@ errout: return ERR_PTR(err); } +static void tcf_proto_get(struct tcf_proto *tp) +{ + refcount_inc(&tp->refcnt); +} + +static void tcf_chain_put(struct tcf_chain *chain); + static void tcf_proto_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, extack); + tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } +static void tcf_proto_put(struct tcf_proto *tp, + struct netlink_ext_ack *extack) +{ + if (refcount_dec_and_test(&tp->refcnt)) + tcf_proto_destroy(tp, extack); +} + #define ASSERT_BLOCK_LOCKED(block) \ lockdep_assert_held(&(block)->lock) @@ -445,18 +461,18 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) static void tcf_chain_flush(struct tcf_chain *chain) { - struct tcf_proto *tp; + struct tcf_proto *tp, *tp_next; mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); + RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); mutex_unlock(&chain->filter_chain_lock); while (tp) { - RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp, NULL); - tp = rtnl_dereference(chain->filter_chain); - tcf_chain_put(chain); + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_put(tp, NULL); + tp = tp_next; } } @@ -1500,9 +1516,9 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, { if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); + tcf_proto_get(tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); - tcf_chain_hold(chain); } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -1514,7 +1530,6 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); - tcf_chain_put(chain); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -1541,7 +1556,12 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } } chain_info->pprev = pprev; - chain_info->next = tp ? tp->next : NULL; + if (tp) { + chain_info->next = tp->next; + tcf_proto_get(tp); + } else { + chain_info->next = NULL; + } return tp; } @@ -1699,6 +1719,7 @@ replay: prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; + tp = NULL; cl = 0; if (prio == 0) { @@ -1816,6 +1837,12 @@ replay: errout: if (chain) tcf_chain_put(chain); + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, NULL); + if (!tp_created) + tcf_chain_put(chain); + } tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ @@ -1946,8 +1973,11 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, NULL); tcf_chain_put(chain); + } tcf_block_release(q, block); return err; @@ -2038,8 +2068,11 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, NULL); tcf_chain_put(chain); + } tcf_block_release(q, block); return err; } From fe2923afc12490e92237d23fc0b29f31da72a4f9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:40 +0200 Subject: [PATCH 1068/1436] net: sched: traverse classifiers in chain with tcf_get_next_proto() All users of chain->filters_chain rely on rtnl lock and assume that no new classifier instances are added when traversing the list. Use tcf_get_next_proto() to traverse filters list without relying on rtnl mutex. This function iterates over classifiers by taking reference to current iterator classifier only and doesn't assume external synchronization of filters list. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ net/sched/cls_api.c | 70 ++++++++++++++++++++++++++++++++++++------- net/sched/sch_api.c | 4 +-- 3 files changed, 64 insertions(+), 12 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 38bee7dd21d1..e5dafa5ee1b2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -46,6 +46,8 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, void tcf_chain_put_by_act(struct tcf_chain *chain); struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain); +struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain, + struct tcf_proto *tp); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 37c05b96898f..dca8a3bee9c2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -980,6 +980,45 @@ tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) } EXPORT_SYMBOL(tcf_get_next_chain); +static struct tcf_proto * +__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) +{ + ASSERT_RTNL(); + mutex_lock(&chain->filter_chain_lock); + + if (!tp) + tp = tcf_chain_dereference(chain->filter_chain, chain); + else + tp = tcf_chain_dereference(tp->next, chain); + + if (tp) + tcf_proto_get(tp); + + mutex_unlock(&chain->filter_chain_lock); + + return tp; +} + +/* Function to be used by all clients that want to iterate over all tp's on + * chain. Users of this function must be tolerant to concurrent tp + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_proto * +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) +{ + struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); + + if (tp) + tcf_proto_put(tp, NULL); + + return tp_next; +} +EXPORT_SYMBOL(tcf_get_next_proto); + static void tcf_block_flush_all_chains(struct tcf_block *block) { struct tcf_chain *chain; @@ -1352,7 +1391,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, struct netlink_ext_ack *extack) { struct tcf_chain *chain, *chain_prev; - struct tcf_proto *tp; + struct tcf_proto *tp, *tp_prev; int err; for (chain = __tcf_get_next_chain(block, NULL); @@ -1360,8 +1399,10 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { - for (tp = rtnl_dereference(chain->filter_chain); tp; - tp = rtnl_dereference(tp->next)) { + for (tp = __tcf_get_next_proto(chain, NULL); tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); @@ -1378,6 +1419,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: + tcf_proto_put(tp, NULL); tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); @@ -1677,8 +1719,8 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) + for (tp = tcf_get_next_proto(chain, NULL); + tp; tp = tcf_get_next_proto(chain, tp)) tfilter_notify(net, oskb, n, tp, block, q, parent, NULL, event, false); } @@ -2104,11 +2146,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; - struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + for (tp = __tcf_get_next_proto(chain, NULL); + tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, NULL), + (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && @@ -2125,7 +2171,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) - return false; + goto errout; cb->args[1] = 1; } @@ -2145,9 +2191,13 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) - return false; + goto errout; } return true; + +errout: + tcf_proto_put(tp, NULL); + return false; } /* called with RTNL */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 80058abc729f..9a530cad2759 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1914,8 +1914,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) { + for (tp = tcf_get_next_proto(chain, NULL); + tp; tp = tcf_get_next_proto(chain, tp)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; From 8b64678e0af8f4d62a40149baedebe78503a5255 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:41 +0200 Subject: [PATCH 1069/1436] net: sched: refactor tp insert/delete for concurrent execution Implement unique insertion function to atomically attach tcf_proto to chain after verifying that no other tcf proto with specified priority exists. Implement delete function that verifies that tp is actually empty before deleting it. Use these functions to refactor cls API to account for concurrent tp and rule update instead of relying on rtnl lock. Add new 'deleting' flag to tcf proto. Use it to restart search when iterating over tp's on chain to prevent accessing potentially inval tp->next pointer. Extend tcf proto with spinlock that is intended to be used to protect its data from concurrent modification instead of relying on rtnl mutex. Use it to protect 'deleting' flag. Add lockdep macros to validate that lock is held when accessing protected fields. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 18 ++++ net/sched/cls_api.c | 177 ++++++++++++++++++++++++++++++++------ 2 files changed, 170 insertions(+), 25 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4372c08fc4d9..083e566fc380 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -322,6 +322,11 @@ struct tcf_proto { void *data; const struct tcf_proto_ops *ops; struct tcf_chain *chain; + /* Lock protects tcf_proto shared state and can be used by unlocked + * classifiers to protect their private data. + */ + spinlock_t lock; + bool deleting; refcount_t refcnt; struct rcu_head rcu; }; @@ -382,16 +387,29 @@ static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) { return lockdep_is_held(&chain->filter_chain_lock); } + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return lockdep_is_held(&tp->lock); +} #else static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain) { return true; } + +static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) +{ + return true; +} #endif /* #ifdef CONFIG_PROVE_LOCKING */ #define tcf_chain_dereference(p, chain) \ rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) +#define tcf_proto_dereference(p, tp) \ + rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) + static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dca8a3bee9c2..c6452e3bfc6a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -180,6 +180,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); @@ -217,6 +218,49 @@ static void tcf_proto_put(struct tcf_proto *tp, tcf_proto_destroy(tp, extack); } +static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) +{ + return -1; +} + +static bool tcf_proto_is_empty(struct tcf_proto *tp) +{ + struct tcf_walker walker = { .fn = walker_noop, }; + + if (tp->ops->walk) { + tp->ops->walk(tp, &walker); + return !walker.stop; + } + return true; +} + +static bool tcf_proto_check_delete(struct tcf_proto *tp) +{ + spin_lock(&tp->lock); + if (tcf_proto_is_empty(tp)) + tp->deleting = true; + spin_unlock(&tp->lock); + return tp->deleting; +} + +static void tcf_proto_mark_delete(struct tcf_proto *tp) +{ + spin_lock(&tp->lock); + tp->deleting = true; + spin_unlock(&tp->lock); +} + +static bool tcf_proto_is_deleting(struct tcf_proto *tp) +{ + bool deleting; + + spin_lock(&tp->lock); + deleting = tp->deleting; + spin_unlock(&tp->lock); + + return deleting; +} + #define ASSERT_BLOCK_LOCKED(block) \ lockdep_assert_held(&(block)->lock) @@ -983,13 +1027,27 @@ EXPORT_SYMBOL(tcf_get_next_chain); static struct tcf_proto * __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { + u32 prio = 0; + ASSERT_RTNL(); mutex_lock(&chain->filter_chain_lock); - if (!tp) + if (!tp) { tp = tcf_chain_dereference(chain->filter_chain, chain); - else + } else if (tcf_proto_is_deleting(tp)) { + /* 'deleting' flag is set and chain->filter_chain_lock was + * unlocked, which means next pointer could be invalid. Restart + * search. + */ + prio = tp->prio + 1; + tp = tcf_chain_dereference(chain->filter_chain, chain); + + for (; tp; tp = tcf_chain_dereference(tp->next, chain)) + if (!tp->deleting && tp->prio >= prio) + break; + } else { tp = tcf_chain_dereference(tp->next, chain); + } if (tp) tcf_proto_get(tp); @@ -1569,11 +1627,85 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, { struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); + tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); } +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate); + +/* Try to insert new proto. + * If proto with specified priority already exists, free new proto + * and return existing one. + */ + +static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, + struct tcf_proto *tp_new, + u32 protocol, u32 prio) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp; + + mutex_lock(&chain->filter_chain_lock); + + tp = tcf_chain_tp_find(chain, &chain_info, + protocol, prio, false); + if (!tp) + tcf_chain_tp_insert(chain, &chain_info, tp_new); + mutex_unlock(&chain->filter_chain_lock); + + if (tp) { + tcf_proto_destroy(tp_new, NULL); + tp_new = tp; + } + + return tp_new; +} + +static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, + struct tcf_proto *tp, + struct netlink_ext_ack *extack) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp_iter; + struct tcf_proto **pprev; + struct tcf_proto *next; + + mutex_lock(&chain->filter_chain_lock); + + /* Atomically find and remove tp from chain. */ + for (pprev = &chain->filter_chain; + (tp_iter = tcf_chain_dereference(*pprev, chain)); + pprev = &tp_iter->next) { + if (tp_iter == tp) { + chain_info.pprev = pprev; + chain_info.next = tp_iter->next; + WARN_ON(tp_iter->deleting); + break; + } + } + /* Verify that tp still exists and no new filters were inserted + * concurrently. + * Mark tp for deletion if it is empty. + */ + if (!tp_iter || !tcf_proto_check_delete(tp)) { + mutex_unlock(&chain->filter_chain_lock); + return; + } + + next = tcf_chain_dereference(chain_info.next, chain); + if (tp == chain->filter_chain) + tcf_chain0_head_change(chain, next); + RCU_INIT_POINTER(*chain_info.pprev, next); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, extack); +} + static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, @@ -1809,6 +1941,8 @@ replay: } if (tp == NULL) { + struct tcf_proto *tp_new = NULL; + /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { @@ -1828,25 +1962,25 @@ replay: &chain_info)); mutex_unlock(&chain->filter_chain_lock); - tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, extack); - if (IS_ERR(tp)) { - err = PTR_ERR(tp); + tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, prio, chain, extack); + if (IS_ERR(tp_new)) { + err = PTR_ERR(tp_new); goto errout; } - mutex_lock(&chain->filter_chain_lock); - tcf_chain_tp_insert(chain, &chain_info, tp); - mutex_unlock(&chain->filter_chain_lock); tp_created = 1; - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { - NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); - err = -EINVAL; - goto errout_locked; + tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio); } else { mutex_unlock(&chain->filter_chain_lock); } + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); + err = -EINVAL; + goto errout; + } + fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { @@ -1873,12 +2007,10 @@ replay: if (err == 0) tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false); - else if (tp_created) - tcf_proto_destroy(tp, NULL); errout: - if (chain) - tcf_chain_put(chain); + if (err && tp_created) + tcf_chain_tp_delete_empty(chain, tp, NULL); if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, NULL); @@ -1984,9 +2116,9 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); + tcf_proto_put(tp, NULL); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); err = 0; goto errout; } @@ -2005,13 +2137,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, extack); if (err) goto errout; - if (last) { - mutex_lock(&chain->filter_chain_lock); - tcf_chain_tp_remove(chain, &chain_info, tp); - mutex_unlock(&chain->filter_chain_lock); - - tcf_proto_destroy(tp, extack); - } + if (last) + tcf_chain_tp_delete_empty(chain, tp, extack); } errout: From 726d061286ceeeabda54ba6f080d0cb8f187a9d7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:42 +0200 Subject: [PATCH 1070/1436] net: sched: prevent insertion of new classifiers during chain flush Extend tcf_chain with 'flushing' flag. Use the flag to prevent insertion of new classifier instances when chain flushing is in progress in order to prevent resource leak when tcf_proto is created by unlocked users concurrently. Return EAGAIN error from tcf_chain_tp_insert_unique() to restart tc_new_tfilter() and lookup the chain/proto again. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/cls_api.c | 35 +++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 083e566fc380..e8cf36ed3e87 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -356,6 +356,7 @@ struct tcf_chain { unsigned int refcnt; unsigned int action_refcnt; bool explicitly_created; + bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c6452e3bfc6a..3038a82f6591 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -477,9 +477,12 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (is_last && !by_act) + if (is_last && !by_act) { tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, block, NULL, 0, 0, false); + /* Last reference to chain, no need to lock. */ + chain->flushing = false; + } if (refcnt == 0) { tc_chain_tmplt_del(tmplt_ops, tmplt_priv); @@ -511,6 +514,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); + chain->flushing = true; mutex_unlock(&chain->filter_chain_lock); while (tp) { @@ -1610,15 +1614,20 @@ static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, return tcf_chain_dereference(*chain_info->pprev, chain); } -static void tcf_chain_tp_insert(struct tcf_chain *chain, - struct tcf_chain_info *chain_info, - struct tcf_proto *tp) +static int tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) { + if (chain->flushing) + return -EAGAIN; + if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); + + return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -1649,18 +1658,22 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, { struct tcf_chain_info chain_info; struct tcf_proto *tp; + int err = 0; mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp) - tcf_chain_tp_insert(chain, &chain_info, tp_new); + err = tcf_chain_tp_insert(chain, &chain_info, tp_new); mutex_unlock(&chain->filter_chain_lock); if (tp) { tcf_proto_destroy(tp_new, NULL); tp_new = tp; + } else if (err) { + tcf_proto_destroy(tp_new, NULL); + tp_new = ERR_PTR(err); } return tp_new; @@ -1943,6 +1956,11 @@ replay: if (tp == NULL) { struct tcf_proto *tp_new = NULL; + if (chain->flushing) { + err = -EAGAIN; + goto errout_locked; + } + /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { @@ -1966,11 +1984,15 @@ replay: protocol, prio, chain, extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); - goto errout; + goto errout_tp; } tp_created = 1; tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio); + if (IS_ERR(tp)) { + err = PTR_ERR(tp); + goto errout_tp; + } } else { mutex_unlock(&chain->filter_chain_lock); } @@ -2011,6 +2033,7 @@ replay: errout: if (err && tp_created) tcf_chain_tp_delete_empty(chain, tp, NULL); +errout_tp: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, NULL); From ec6743a10996d38e0438e5f45f2347ff2f42df0a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:43 +0200 Subject: [PATCH 1071/1436] net: sched: track rtnl lock status when validating extensions Actions API is already updated to not rely on rtnl lock for synchronization. However, it need to be provided with rtnl status when called from classifiers API in order to be able to correctly release the lock when loading kernel module. Extend extension validation function with 'rtnl_held' flag which is passed to actions API. Add new 'rtnl_held' parameter to tcf_exts_validate() in cls API. No classifier is currently updated to support unlocked execution, so pass hardcoded 'true' flag parameter value. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- net/sched/cls_api.c | 9 +++++---- net/sched/cls_basic.c | 2 +- net/sched/cls_bpf.c | 3 ++- net/sched/cls_cgroup.c | 2 +- net/sched/cls_flow.c | 2 +- net/sched/cls_flower.c | 3 ++- net/sched/cls_fw.c | 2 +- net/sched/cls_matchall.c | 3 ++- net/sched/cls_route.c | 2 +- net/sched/cls_rsvp.h | 3 ++- net/sched/cls_tcindex.c | 2 +- net/sched/cls_u32.c | 2 +- 13 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e5dafa5ee1b2..0e3b61016931 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -416,7 +416,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, + struct tcf_exts *exts, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3038a82f6591..a3e715d34efb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2841,7 +2841,7 @@ EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -2851,7 +2851,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, true, extack); + TCA_ACT_BIND, rtnl_held, + extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -2863,8 +2864,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, true, - extack); + exts->actions, &attr_size, + rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4a57fec6f306..eaf9c02fe792 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -153,7 +153,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a95cb240a606..656b3423ad35 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + extack); if (ret < 0) return ret; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3bc01bdde165..663ee1c6d606 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - extack); + true, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2bb043cd436b..39a6407d4832 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -445,7 +445,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - extack); + true, extack); if (err < 0) goto err2; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 6a341287a527..5e3f74ab68ca 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1272,7 +1272,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + extack); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29eeeaf3ea44..c8173ebb69f2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -217,7 +217,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, int err; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - extack); + true, extack); if (err < 0) return err; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a1b803fd372e..8848a147c4bf 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -145,7 +145,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, + extack); if (err < 0) return err; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 0404aa5fa7cb..44b26038c4c4 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -393,7 +393,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index e9ccf7daea7d..9dd9530e6a52 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -502,7 +502,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9ccc93f257db..b7dc667b6ec0 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -314,7 +314,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dcea21004604..e891f30d42e9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -726,7 +726,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); if (err < 0) return err; From 7d5509fa0d3ddfe252b4418513e493ac98de3317 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:44 +0200 Subject: [PATCH 1072/1436] net: sched: extend proto ops with 'put' callback Add optional tp->ops->put() API to be implemented for filter reference counting. This new function is called by cls API to release filter reference for filters returned by tp->ops->change() or tp->ops->get() functions. Implement tfilter_put() helper to call tp->ops->put() only for classifiers that implement it. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/cls_api.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e8cf36ed3e87..410dda80ca62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -277,6 +277,7 @@ struct tcf_proto_ops { struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); + void (*put)(struct tcf_proto *tp, void *f); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a3e715d34efb..8fe38aa180cf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1870,6 +1870,12 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, q, parent, NULL, event, false); } +static void tfilter_put(struct tcf_proto *tp, void *fh) +{ + if (tp->ops->put && fh) + tp->ops->put(tp, fh); +} + static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2012,6 +2018,7 @@ replay: goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; @@ -2026,9 +2033,11 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, extack); - if (err == 0) + if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false); + tfilter_put(tp, fh); + } errout: if (err && tp_created) @@ -2259,6 +2268,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } + tfilter_put(tp, fh); errout: if (chain) { if (tp && !IS_ERR(tp)) From 12db03b65c2b90752e4c37666977fd4a1b5f5824 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:45 +0200 Subject: [PATCH 1073/1436] net: sched: extend proto ops to support unlocked classifiers Add 'rtnl_held' flag to tcf proto change, delete, destroy, dump, walk functions to track rtnl lock status. Extend users of these function in cls API to propagate rtnl lock status to them. This allows classifiers to obtain rtnl lock when necessary and to pass rtnl lock status to extensions and driver offload callbacks. Add flags field to tcf proto ops. Add flag value to indicate that classifier doesn't require rtnl lock. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 17 ++-- net/sched/cls_api.c | 168 +++++++++++++++++++++----------------- net/sched/cls_basic.c | 12 +-- net/sched/cls_bpf.c | 12 +-- net/sched/cls_cgroup.c | 11 +-- net/sched/cls_flow.c | 13 +-- net/sched/cls_flower.c | 13 +-- net/sched/cls_fw.c | 13 +-- net/sched/cls_matchall.c | 13 +-- net/sched/cls_route.c | 12 +-- net/sched/cls_rsvp.h | 13 +-- net/sched/cls_tcindex.c | 15 ++-- net/sched/cls_u32.c | 12 +-- net/sched/sch_api.c | 6 +- 15 files changed, 191 insertions(+), 141 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0e3b61016931..6a530bef9253 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -47,7 +47,7 @@ void tcf_chain_put_by_act(struct tcf_chain *chain); struct tcf_chain *tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain); struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain, - struct tcf_proto *tp); + struct tcf_proto *tp, bool rtnl_held); void tcf_block_netif_keep_dst(struct tcf_block *block); int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 410dda80ca62..365801c2a4f5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -273,7 +273,7 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto *tp, + void (*destroy)(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); @@ -281,12 +281,13 @@ struct tcf_proto_ops { int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, + void **, bool, bool, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, - bool *last, + bool *last, bool rtnl_held, struct netlink_ext_ack *); - void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*walk)(struct tcf_proto *tp, + struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); @@ -299,12 +300,18 @@ struct tcf_proto_ops { /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, - struct sk_buff *skb, struct tcmsg*); + struct sk_buff *skb, struct tcmsg*, + bool); int (*tmplt_dump)(struct sk_buff *skb, struct net *net, void *tmplt_priv); struct module *owner; + int flags; +}; + +enum tcf_proto_ops_flags { + TCF_PROTO_OPS_DOIT_UNLOCKED = 1, }; struct tcf_proto { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8fe38aa180cf..e8ed461e94af 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -69,7 +69,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) } static const struct tcf_proto_ops * -tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) +tcf_proto_lookup_ops(const char *kind, bool rtnl_held, + struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; @@ -77,9 +78,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) if (ops) return ops; #ifdef CONFIG_MODULES - rtnl_unlock(); + if (rtnl_held) + rtnl_unlock(); request_module("cls_%s", kind); - rtnl_lock(); + if (rtnl_held) + rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading @@ -162,6 +165,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; @@ -171,7 +175,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, if (!tp) return ERR_PTR(-ENOBUFS); - tp->ops = tcf_proto_lookup_ops(kind, extack); + tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; @@ -202,20 +206,20 @@ static void tcf_proto_get(struct tcf_proto *tp) static void tcf_chain_put(struct tcf_chain *chain); -static void tcf_proto_destroy(struct tcf_proto *tp, +static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - tp->ops->destroy(tp, extack); + tp->ops->destroy(tp, rtnl_held, extack); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } -static void tcf_proto_put(struct tcf_proto *tp, +static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) - tcf_proto_destroy(tp, extack); + tcf_proto_destroy(tp, rtnl_held, extack); } static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) @@ -223,21 +227,21 @@ static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) return -1; } -static bool tcf_proto_is_empty(struct tcf_proto *tp) +static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) { struct tcf_walker walker = { .fn = walker_noop, }; if (tp->ops->walk) { - tp->ops->walk(tp, &walker); + tp->ops->walk(tp, &walker, rtnl_held); return !walker.stop; } return true; } -static bool tcf_proto_check_delete(struct tcf_proto *tp) +static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) { spin_lock(&tp->lock); - if (tcf_proto_is_empty(tp)) + if (tcf_proto_is_empty(tp, rtnl_held)) tp->deleting = true; spin_unlock(&tp->lock); return tp->deleting; @@ -506,7 +510,7 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) __tcf_chain_put(chain, false, true); } -static void tcf_chain_flush(struct tcf_chain *chain) +static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { struct tcf_proto *tp, *tp_next; @@ -519,7 +523,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) while (tp) { tp_next = rcu_dereference_protected(tp->next, 1); - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); tp = tp_next; } } @@ -1070,18 +1074,19 @@ __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) */ struct tcf_proto * -tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, + bool rtnl_held) { struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); if (tp) - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); return tp_next; } EXPORT_SYMBOL(tcf_get_next_proto); -static void tcf_block_flush_all_chains(struct tcf_block *block) +static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) { struct tcf_chain *chain; @@ -1092,12 +1097,12 @@ static void tcf_block_flush_all_chains(struct tcf_block *block) chain; chain = tcf_get_next_chain(block, chain)) { tcf_chain_put_explicitly_created(chain); - tcf_chain_flush(chain); + tcf_chain_flush(chain, rtnl_held); } } static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) + struct tcf_block_ext_info *ei, bool rtnl_held) { if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { /* Flushing/putting all chains will cause the block to be @@ -1118,15 +1123,15 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, if (free_block) tcf_block_destroy(block); else - tcf_block_flush_all_chains(block); + tcf_block_flush_all_chains(block, rtnl_held); } else if (q) { tcf_block_offload_unbind(block, q, ei); } } -static void tcf_block_refcnt_put(struct tcf_block *block) +static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) { - __tcf_block_put(block, NULL, NULL); + __tcf_block_put(block, NULL, NULL, rtnl_held); } /* Find tcf block. @@ -1244,10 +1249,11 @@ errout_qdisc: return ERR_PTR(err); } -static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, + bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, rtnl_held); if (q) qdisc_put(q); @@ -1358,7 +1364,7 @@ err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -1395,7 +1401,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - __tcf_block_put(block, q, ei); + __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); @@ -1464,7 +1470,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), - tcf_proto_put(tp_prev, NULL)) { + tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); @@ -1481,7 +1487,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, true, NULL); tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); @@ -1654,7 +1660,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, struct tcf_proto *tp_new, - u32 protocol, u32 prio) + u32 protocol, u32 prio, + bool rtnl_held) { struct tcf_chain_info chain_info; struct tcf_proto *tp; @@ -1669,10 +1676,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_unlock(&chain->filter_chain_lock); if (tp) { - tcf_proto_destroy(tp_new, NULL); + tcf_proto_destroy(tp_new, rtnl_held, NULL); tp_new = tp; } else if (err) { - tcf_proto_destroy(tp_new, NULL); + tcf_proto_destroy(tp_new, rtnl_held, NULL); tp_new = ERR_PTR(err); } @@ -1680,7 +1687,7 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, } static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, - struct tcf_proto *tp, + struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_chain_info chain_info; @@ -1705,7 +1712,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, * concurrently. * Mark tp for deletion if it is empty. */ - if (!tp_iter || !tcf_proto_check_delete(tp)) { + if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { mutex_unlock(&chain->filter_chain_lock); return; } @@ -1716,7 +1723,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, RCU_INIT_POINTER(*chain_info.pprev, next); mutex_unlock(&chain->filter_chain_lock); - tcf_proto_put(tp, extack); + tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -1755,7 +1762,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, - u32 portid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event, + bool rtnl_held) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1783,7 +1791,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, if (!fh) { tcm->tcm_handle = 0; } else { - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + if (tp->ops->dump && + tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -1798,7 +1807,8 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, int event, bool unicast) + u32 parent, void *fh, int event, bool unicast, + bool rtnl_held) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1808,7 +1818,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, event, + rtnl_held) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -1824,7 +1835,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool unicast, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1835,13 +1846,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, + rtnl_held) <= 0) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last, extack); + err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; @@ -1860,14 +1872,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, - struct tcf_chain *chain, int event) + struct tcf_chain *chain, int event, + bool rtnl_held) { struct tcf_proto *tp; - for (tp = tcf_get_next_proto(chain, NULL); - tp; tp = tcf_get_next_proto(chain, tp)) + for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); + tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) tfilter_notify(net, oskb, n, tp, block, - q, parent, NULL, event, false); + q, parent, NULL, event, false, rtnl_held); } static void tfilter_put(struct tcf_proto *tp, void *fh) @@ -1896,6 +1909,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh; int err; int tp_created; + bool rtnl_held = true; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1987,14 +2001,16 @@ replay: mutex_unlock(&chain->filter_chain_lock); tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, extack); + protocol, prio, chain, rtnl_held, + extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); goto errout_tp; } tp_created = 1; - tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio); + tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, + rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_tp; @@ -2032,24 +2048,24 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - extack); + rtnl_held, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_NEWTFILTER, false); + RTM_NEWTFILTER, false, rtnl_held); tfilter_put(tp, fh); } errout: if (err && tp_created) - tcf_chain_tp_delete_empty(chain, tp, NULL); + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); errout_tp: if (chain) { if (tp && !IS_ERR(tp)) - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); if (!tp_created) tcf_chain_put(chain); } - tcf_block_release(q, block); + tcf_block_release(q, block, rtnl_held); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -2078,6 +2094,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = true; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -2127,8 +2144,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); - tcf_chain_flush(chain); + chain, RTM_DELTFILTER, rtnl_held); + tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } @@ -2148,9 +2165,9 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); + RTM_DELTFILTER, false, rtnl_held); err = 0; goto errout; } @@ -2166,20 +2183,21 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, false, &last, - extack); + rtnl_held, extack); + if (err) goto errout; if (last) - tcf_chain_tp_delete_empty(chain, tp, extack); + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: if (chain) { if (tp && !IS_ERR(tp)) - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } - tcf_block_release(q, block); + tcf_block_release(q, block, rtnl_held); return err; errout_locked: @@ -2205,6 +2223,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = true; err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2263,7 +2282,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, - fh, RTM_NEWTFILTER, true); + fh, RTM_NEWTFILTER, true, rtnl_held); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } @@ -2272,10 +2291,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) { if (tp && !IS_ERR(tp)) - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } - tcf_block_release(q, block); + tcf_block_release(q, block, rtnl_held); return err; } @@ -2296,7 +2315,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, @@ -2313,7 +2332,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), - tcf_proto_put(tp_prev, NULL), + tcf_proto_put(tp_prev, true, NULL), (*p_index)++) { if (*p_index < index_start) continue; @@ -2330,9 +2349,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) + RTM_NEWTFILTER, true) <= 0) goto errout; - cb->args[1] = 1; } if (!tp->ops->walk) @@ -2347,7 +2365,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) @@ -2356,7 +2374,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, return true; errout: - tcf_proto_put(tp, NULL); + tcf_proto_put(tp, true, NULL); return false; } @@ -2448,7 +2466,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2569,7 +2587,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, if (!tca[TCA_KIND]) return 0; - ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack); + ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { @@ -2699,9 +2717,9 @@ replay: break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); + chain, RTM_DELTFILTER, true); /* Flush the chain first as the user requested chain removal. */ - tcf_chain_flush(chain); + tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ @@ -2722,7 +2740,7 @@ replay: errout: tcf_chain_put(chain); errout_block: - tcf_block_release(q, block); + tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -2829,7 +2847,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index eaf9c02fe792..2383f449d2bc 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -247,7 +248,8 @@ errout: return err; } -static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; @@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 656b3423ad35..062350c6621c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp, +static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -456,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -576,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, } static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *tm) + struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held) { struct cls_bpf_prog *prog = fh; struct nlattr *nest; @@ -636,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) prog->res.class = cl; } -static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 663ee1c6d606..1cef3b416094 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, + void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -130,7 +130,7 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp, +static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -145,12 +145,13 @@ static void cls_cgroup_destroy(struct tcf_proto *tp, } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -166,7 +167,7 @@ skip: } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 39a6407d4832..204e2edae8d5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -566,7 +567,7 @@ err1: } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; @@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle) } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; @@ -677,7 +679,8 @@ nla_put_failure: return -1; } -static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5e3f74ab68ca..32fa3e20adc5 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -465,7 +465,8 @@ static void fl_destroy_sleepable(struct work_struct *work) module_put(THIS_MODULE); } -static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct fl_flow_mask *mask, *next_mask; @@ -1300,7 +1301,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -1437,7 +1439,7 @@ errout_mask_alloc: } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -1449,7 +1451,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; @@ -2044,7 +2047,7 @@ nla_put_failure: } static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_fl_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c8173ebb69f2..317151bae73b 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fw_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, struct netlink_ext_ack *extack) + bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -354,7 +356,8 @@ errout: return err; } -static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); int h; @@ -384,7 +387,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = fh; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 8848a147c4bf..a37137430e61 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -160,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -233,12 +235,13 @@ err_exts_init: } static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -280,7 +283,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 44b26038c4c4..e590c3a2999d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f) tcf_queue_work(&f->rwork, route4_delete_filter_work); } -static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void route4_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -560,7 +561,8 @@ errout: return err; } -static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -597,7 +599,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct route4_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 9dd9530e6a52..4d3836178fa5 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -655,7 +657,8 @@ errout2: return err; } -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct rsvp_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -689,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct rsvp_filter *f = fh; struct rsvp_session *s; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index b7dc667b6ec0..14d6b4058045 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work) } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last, NULL); + return tcindex_delete(tp, arg, &last, false, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -499,7 +499,7 @@ static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -522,7 +522,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); } -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) +static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, + bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; @@ -558,7 +559,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp, +static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); @@ -568,14 +569,14 @@ static void tcindex_destroy(struct tcf_proto *tp, walker.count = 0; walker.skip = 0; walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker); + tcindex_walk(tp, &walker, true); call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index e891f30d42e9..27d29c04dcc9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_common *tp_c = tp->data; @@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, + struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -1123,7 +1124,8 @@ erridr: return err; } -static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl) } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9a530cad2759..2283924fb56d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1914,14 +1914,14 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = tcf_get_next_proto(chain, NULL); - tp; tp = tcf_get_next_proto(chain, tp)) { + for (tp = tcf_get_next_proto(chain, NULL, true); + tp; tp = tcf_get_next_proto(chain, tp, true)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = clid; arg.cl = new_cl; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); } } } From dfcd2a2b2274ec029699be70ea0fe41a7a03f728 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:46 +0200 Subject: [PATCH 1074/1436] net: sched: add flags to Qdisc class ops struct Extend Qdisc_class_ops with flags. Create enum to hold possible class ops flag values. Add first class ops flags value QDISC_CLASS_OPS_DOIT_UNLOCKED to indicate that class ops functions can be called without taking rtnl lock. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 365801c2a4f5..e50b729f8691 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -179,6 +179,7 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) } struct Qdisc_class_ops { + unsigned int flags; /* Child qdisc manipulation */ struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, @@ -210,6 +211,13 @@ struct Qdisc_class_ops { struct gnet_dump *); }; +/* Qdisc_class_ops flag values */ + +/* Implements API that doesn't require rtnl lock */ +enum qdisc_class_ops_flags { + QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, +}; + struct Qdisc_ops { struct Qdisc_ops *next; const struct Qdisc_class_ops *cl_ops; From 18d3eefb17cf1959bedb4f8cbb76b6aa05657776 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:47 +0200 Subject: [PATCH 1075/1436] net: sched: refactor tcf_block_find() into standalone functions Refactor tcf_block_find() code into three standalone functions: - __tcf_qdisc_find() to lookup Qdisc and increment its reference counter. - __tcf_qdisc_cl_find() to lookup class. - __tcf_block_find() to lookup block and increment its reference counter. This change is necessary to allow netlink tc rule update handlers to call these functions directly in order to conditionally take rtnl lock according to Qdisc class ops flags before calling any of class ops functions. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 241 +++++++++++++++++++++++++++----------------- 1 file changed, 149 insertions(+), 92 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e8ed461e94af..5f9373ee47ce 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1101,6 +1101,142 @@ static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) } } +/* Lookup Qdisc and increments its reference counter. + * Set parent, if necessary. + */ + +static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, + u32 *parent, int ifindex, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + const struct Qdisc_class_ops *cops; + struct net_device *dev; + int err = 0; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; + + rcu_read_lock(); + + /* Find link */ + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + + /* Find qdisc */ + if (!*parent) { + *q = dev->qdisc; + *parent = (*q)->handle; + } else { + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } + } + + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } + + /* Is it classful? */ + cops = (*q)->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + err = -EINVAL; + goto errout_qdisc; + } + + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + err = -EOPNOTSUPP; + goto errout_qdisc; + } + +errout_rcu: + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + return err; + +errout_qdisc: + rcu_read_unlock(); + + if (rtnl_held) + qdisc_put(*q); + else + qdisc_put_unlocked(*q); + *q = NULL; + + return err; +} + +static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, + int ifindex, struct netlink_ext_ack *extack) +{ + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(parent)) { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + *cl = cops->find(q, parent); + if (*cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return -ENOENT; + } + } + + return 0; +} + +static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, + unsigned long cl, int ifindex, + u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_refcnt_get(net, block_index); + if (!block) { + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + return ERR_PTR(-EINVAL); + } + } else { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + block = cops->tcf_block(q, cl, extack); + if (!block) + return ERR_PTR(-EINVAL); + + if (tcf_block_shared(block)) { + NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* Always take reference to block in order to support execution + * of rules update path of cls API without rtnl lock. Caller + * must release block when it is finished using it. 'if' block + * of this conditional obtain reference to block by calling + * tcf_block_refcnt_get(). + */ + refcount_inc(&block->refcnt); + } + + return block; +} + static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, bool rtnl_held) { @@ -1146,106 +1282,27 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, struct tcf_block *block; int err = 0; - if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_refcnt_get(net, block_index); - if (!block) { - NL_SET_ERR_MSG(extack, "Block of given index was not found"); - return ERR_PTR(-EINVAL); - } - } else { - const struct Qdisc_class_ops *cops; - struct net_device *dev; + ASSERT_RTNL(); - rcu_read_lock(); + err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); + if (err) + goto errout; - /* Find link */ - dev = dev_get_by_index_rcu(net, ifindex); - if (!dev) { - rcu_read_unlock(); - return ERR_PTR(-ENODEV); - } + err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); + if (err) + goto errout_qdisc; - /* Find qdisc */ - if (!*parent) { - *q = dev->qdisc; - *parent = (*q)->handle; - } else { - *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); - if (!*q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - err = -EINVAL; - goto errout_rcu; - } - } - - *q = qdisc_refcount_inc_nz(*q); - if (!*q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - err = -EINVAL; - goto errout_rcu; - } - - /* Is it classful? */ - cops = (*q)->ops->cl_ops; - if (!cops) { - NL_SET_ERR_MSG(extack, "Qdisc not classful"); - err = -EINVAL; - goto errout_rcu; - } - - if (!cops->tcf_block) { - NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - err = -EOPNOTSUPP; - goto errout_rcu; - } - - /* At this point we know that qdisc is not noop_qdisc, - * which means that qdisc holds a reference to net_device - * and we hold a reference to qdisc, so it is safe to release - * rcu read lock. - */ - rcu_read_unlock(); - - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(*parent)) { - *cl = cops->find(*q, *parent); - if (*cl == 0) { - NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - err = -ENOENT; - goto errout_qdisc; - } - } - - /* And the last stroke */ - block = cops->tcf_block(*q, *cl, extack); - if (!block) { - err = -EINVAL; - goto errout_qdisc; - } - if (tcf_block_shared(block)) { - NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - err = -EOPNOTSUPP; - goto errout_qdisc; - } - - /* Always take reference to block in order to support execution - * of rules update path of cls API without rtnl lock. Caller - * must release block when it is finished using it. 'if' block - * of this conditional obtain reference to block by calling - * tcf_block_refcnt_get(). - */ - refcount_inc(&block->refcnt); - } + block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); + if (IS_ERR(block)) + goto errout_qdisc; return block; -errout_rcu: - rcu_read_unlock(); errout_qdisc: - if (*q) { + if (*q) qdisc_put(*q); - *q = NULL; - } +errout: + *q = NULL; return ERR_PTR(err); } From 470502de5bdb1ed0def643a4458593a40b8f6b66 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 11 Feb 2019 10:55:48 +0200 Subject: [PATCH 1076/1436] net: sched: unlock rules update API Register netlink protocol handlers for message types RTM_NEWTFILTER, RTM_DELTFILTER, RTM_GETTFILTER as unlocked. Set rtnl_held variable that tracks rtnl mutex state to be false by default. Introduce tcf_proto_is_unlocked() helper that is used to check tcf_proto_ops->flag to determine if ops can be called without taking rtnl lock. Manually lookup Qdisc, class and block in rule update handlers. Verify that both Qdisc ops and proto ops are unlocked before using any of their callbacks, and obtain rtnl lock otherwise. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 131 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 114 insertions(+), 17 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5f9373ee47ce..266fcb34fefe 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -163,6 +163,23 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_is_unlocked(const char *kind) +{ + const struct tcf_proto_ops *ops; + bool ret; + + ops = tcf_proto_lookup_ops(kind, false, NULL); + /* On error return false to take rtnl lock. Proto lookup/create + * functions will perform lookup again and properly handle errors. + */ + if (IS_ERR(ops)) + return false; + + ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); + module_put(ops->owner); + return ret; +} + static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, bool rtnl_held, @@ -1312,8 +1329,12 @@ static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, if (!IS_ERR_OR_NULL(block)) tcf_block_refcnt_put(block, rtnl_held); - if (q) - qdisc_put(q); + if (q) { + if (rtnl_held) + qdisc_put(q); + else + qdisc_put_unlocked(q); + } } struct tcf_block_owner_item { @@ -1966,7 +1987,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh; int err; int tp_created; - bool rtnl_held = true; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1985,6 +2006,7 @@ replay: parent = t->tcm_parent; tp = NULL; cl = 0; + block = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -2001,8 +2023,27 @@ replay: /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, + * block is shared (no qdisc found), qdisc is not unlocked, classifier + * type is not specified, classifier is not unlocked. + */ + if (rtnl_held || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -2123,9 +2164,18 @@ errout_tp: tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); - if (err == -EAGAIN) + + if (rtnl_held) + rtnl_unlock(); + + if (err == -EAGAIN) { + /* Take rtnl lock in case EAGAIN is caused by concurrent flush + * of target chain. + */ + rtnl_held = true; /* Replay the request. */ goto replay; + } return err; errout_locked: @@ -2146,12 +2196,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; - bool rtnl_held = true; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -2172,8 +2222,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc + * found), qdisc is not unlocked, classifier type is not specified, + * classifier is not unlocked. + */ + if (!prio || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -2255,6 +2324,10 @@ errout: tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; errout_locked: @@ -2275,12 +2348,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; - bool rtnl_held = true; + bool rtnl_held = false; err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2298,8 +2371,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not + * unlocked, classifier type is not specified, classifier is not + * unlocked. + */ + if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -2352,6 +2443,10 @@ errout: tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; } @@ -3214,10 +3309,12 @@ static int __init tc_filter_init(void) if (err) goto err_rhash_setup_block_ht; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, 0); + tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, From e1f1bd9bfbedcfce428ee7e1b82a6ec12d4c3863 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Mon, 11 Feb 2019 19:17:43 +0200 Subject: [PATCH 1077/1436] net: ena: fix race between link up and device initalization Fix race condition between ena_update_on_link_change() and ena_restore_device(). This race can occur if link notification arrives while the driver is performing a reset sequence. In this case link can be set up, enabling the device, before it is fully restored. If packets are sent at this time, the driver might access uninitialized data structures, causing kernel crash. Move the clearing of ENA_FLAG_ONGOING_RESET and netif_carrier_on() after ena_up() to ensure the device is ready when link is set up. Fixes: d18e4f683445 ("net: ena: fix race condition between device reset and link up setup") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a70bb1bb90e7..a6eacf2099c3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2663,11 +2663,6 @@ static int ena_restore_device(struct ena_adapter *adapter) goto err_device_destroy; } - clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); - /* Make sure we don't have a race with AENQ Links state handler */ - if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) - netif_carrier_on(adapter->netdev); - rc = ena_enable_msix_and_set_admin_interrupts(adapter, adapter->num_queues); if (rc) { @@ -2684,6 +2679,11 @@ static int ena_restore_device(struct ena_adapter *adapter) } set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); dev_err(&pdev->dev, "Device reset completed successfully, Driver info: %s\n", From d9b8656da92223eb004b4f4db74fe48e7433f7b2 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Mon, 11 Feb 2019 19:17:44 +0200 Subject: [PATCH 1078/1436] net: ena: update driver version from 2.0.2 to 2.0.3 Update driver version due to bug fix. Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index dc8b6173d8d8..63870072cbbd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -45,7 +45,7 @@ #define DRV_MODULE_VER_MAJOR 2 #define DRV_MODULE_VER_MINOR 0 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_SUBMINOR 3 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION From 8015d93ebd27484418d4952284fd02172fa4b0b2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Feb 2019 13:06:14 -0800 Subject: [PATCH 1079/1436] net_sched: fix a race condition in tcindex_destroy() tcindex_destroy() invokes tcindex_destroy_element() via a walker to delete each filter result in its perfect hash table, and tcindex_destroy_element() calls tcindex_delete() which schedules tcf RCU works to do the final deletion work. Unfortunately this races with the RCU callback __tcindex_destroy(), which could lead to use-after-free as reported by Adrian. Fix this by migrating this RCU callback to tcf RCU work too, as that workqueue is ordered, we will not have use-after-free. Note, we don't need to hold netns refcnt because we don't call tcf_exts_destroy() here. Fixes: 27ce4f05e2ab ("net_sched: use tcf_queue_work() in tcindex filter") Reported-by: Adrian Cc: Ben Hutchings Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9ccc93f257db..79b52a637dda 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -48,7 +48,7 @@ struct tcindex_data { u32 hash; /* hash table size; 0 if undefined */ u32 alloc_hash; /* allocated size */ u32 fall_through; /* 0: only classify if explicit match */ - struct rcu_head rcu; + struct rcu_work rwork; }; static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) @@ -229,9 +229,11 @@ static int tcindex_destroy_element(struct tcf_proto *tp, return tcindex_delete(tp, arg, &last, NULL); } -static void __tcindex_destroy(struct rcu_head *head) +static void tcindex_destroy_work(struct work_struct *work) { - struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + struct tcindex_data *p = container_of(to_rcu_work(work), + struct tcindex_data, + rwork); kfree(p->perfect); kfree(p->h); @@ -258,9 +260,11 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r) return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } -static void __tcindex_partial_destroy(struct rcu_head *head) +static void tcindex_partial_destroy_work(struct work_struct *work) { - struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + struct tcindex_data *p = container_of(to_rcu_work(work), + struct tcindex_data, + rwork); kfree(p->perfect); kfree(p); @@ -478,7 +482,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (oldp) - call_rcu(&oldp->rcu, __tcindex_partial_destroy); + tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work); return 0; errout_alloc: @@ -570,7 +574,7 @@ static void tcindex_destroy(struct tcf_proto *tp, walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - call_rcu(&p->rcu, __tcindex_destroy); + tcf_queue_work(&p->rwork, tcindex_destroy_work); } From 033b228e7f26b29ae37f8bfa1bc6b209a5365e9f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Feb 2019 13:06:15 -0800 Subject: [PATCH 1080/1436] net_sched: fix a memory leak in cls_tcindex When tcindex_destroy() destroys all the filter results in the perfect hash table, it invokes the walker to delete each of them. However, results with class==0 are skipped in either tcindex_walk() or tcindex_delete(), which causes a memory leak reported by kmemleak. This patch fixes it by skipping the walker and directly deleting these filter results so we don't miss any filter result. As a result of this change, we have to initialize exts->net properly in tcindex_alloc_perfect_hash(). For net-next, we need to consider whether we should initialize ->net in tcf_exts_init() instead, before that just directly test CONFIG_NET_CLS_ACT=y. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 79b52a637dda..70ea5b1a7889 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -221,14 +221,6 @@ found: return 0; } -static int tcindex_destroy_element(struct tcf_proto *tp, - void *arg, struct tcf_walker *walker) -{ - bool last; - - return tcindex_delete(tp, arg, &last, NULL); -} - static void tcindex_destroy_work(struct work_struct *work) { struct tcindex_data *p = container_of(to_rcu_work(work), @@ -279,7 +271,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp) kfree(cp->perfect); } -static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) { int i, err = 0; @@ -293,6 +285,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; +#ifdef CONFIG_NET_CLS_ACT + cp->perfect[i].exts.net = net; +#endif } return 0; @@ -341,7 +336,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) cp->perfect[i].res = p->perfect[i].res; @@ -410,7 +405,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout_alloc; balloc = 1; } else { @@ -566,13 +561,32 @@ static void tcindex_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcf_walker walker; + int i; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); - walker.count = 0; - walker.skip = 0; - walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker); + + if (p->perfect) { + for (i = 0; i < p->hash; i++) { + struct tcindex_filter_result *r = p->perfect + i; + + tcf_unbind_filter(tp, &r->res); + if (tcf_exts_get_net(&r->exts)) + tcf_queue_work(&r->rwork, + tcindex_destroy_rexts_work); + else + __tcindex_destroy_rexts(r); + } + } + + for (i = 0; p->h && i < p->hash; i++) { + struct tcindex_filter *f, *next; + bool last; + + for (f = rtnl_dereference(p->h[i]); f; f = next) { + next = rtnl_dereference(f->next); + tcindex_delete(tp, &f->result, &last, NULL); + } + } tcf_queue_work(&p->rwork, tcindex_destroy_work); } From 1db817e75f5b9387b8db11e37d5f0624eb9223e0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Feb 2019 13:06:16 -0800 Subject: [PATCH 1081/1436] net_sched: fix two more memory leaks in cls_tcindex struct tcindex_filter_result contains two parts: struct tcf_exts and struct tcf_result. For the local variable 'cr', its exts part is never used but initialized without being released properly on success path. So just completely remove the exts part to fix this leak. For the local variable 'new_filter_result', it is never properly released if not used by 'r' on success path. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 70ea5b1a7889..38bb882bb958 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -304,9 +304,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; @@ -345,13 +345,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->h = p->h; err = tcindex_filter_result_init(&new_filter_result); - if (err < 0) - goto errout1; - err = tcindex_filter_result_init(&cr); if (err < 0) goto errout1; if (old_r) - cr.res = r->res; + cr = r->res; if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -442,8 +439,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); } if (old_r && old_r != r) { @@ -455,7 +452,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e); rcu_assign_pointer(tp->root, cp); @@ -474,6 +471,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */ rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); } if (oldp) @@ -486,7 +485,6 @@ errout_alloc: else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); From 2fdeee2549231b1f989f011bb18191f5660d3745 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Feb 2019 21:59:51 -0800 Subject: [PATCH 1082/1436] team: avoid complex list operations in team_nl_cmd_options_set() The current opt_inst_list operations inside team_nl_cmd_options_set() is too complex to track: LIST_HEAD(opt_inst_list); nla_for_each_nested(...) { list_for_each_entry(opt_inst, &team->option_inst_list, list) { if (__team_option_inst_tmp_find(&opt_inst_list, opt_inst)) continue; list_add(&opt_inst->tmp_list, &opt_inst_list); } } team_nl_send_event_options_get(team, &opt_inst_list); as while we retrieve 'opt_inst' from team->option_inst_list, it could be added to the local 'opt_inst_list' for multiple times. The __team_option_inst_tmp_find() doesn't work, as the setter team_mode_option_set() still calls team->ops.exit() which uses ->tmp_list too in __team_options_change_check(). Simplify the list operations by moving the 'opt_inst_list' and team_nl_send_event_options_get() into the nla_for_each_nested() loop so that it can be guranteed that we won't insert a same list entry for multiple times. Therefore, __team_option_inst_tmp_find() can be removed too. Fixes: 4fb0534fb7bb ("team: avoid adding twice the same option to the event list") Fixes: 2fcdb2c9e659 ("team: allow to send multiple set events in one message") Reported-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com Reported-by: syzbot+68ee510075cf64260cc4@syzkaller.appspotmail.com Cc: Jiri Pirko Cc: Paolo Abeni Signed-off-by: Cong Wang Acked-by: Jiri Pirko Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller --- drivers/net/team/team.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index afd9d25d1992..958f1cf67282 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -256,17 +256,6 @@ static void __team_option_inst_mark_removed_port(struct team *team, } } -static bool __team_option_inst_tmp_find(const struct list_head *opts, - const struct team_option_inst *needle) -{ - struct team_option_inst *opt_inst; - - list_for_each_entry(opt_inst, opts, tmp_list) - if (opt_inst == needle) - return true; - return false; -} - static int __team_options_register(struct team *team, const struct team_option *option, size_t option_count) @@ -2460,7 +2449,6 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) int err = 0; int i; struct nlattr *nl_option; - LIST_HEAD(opt_inst_list); rtnl_lock(); @@ -2480,6 +2468,7 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; struct nlattr *attr; struct nlattr *attr_data; + LIST_HEAD(opt_inst_list); enum team_option_type opt_type; int opt_port_ifindex = 0; /* != 0 for per-port options */ u32 opt_array_index = 0; @@ -2584,23 +2573,17 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) if (err) goto team_put; opt_inst->changed = true; - - /* dumb/evil user-space can send us duplicate opt, - * keep only the last one - */ - if (__team_option_inst_tmp_find(&opt_inst_list, - opt_inst)) - continue; - list_add(&opt_inst->tmp_list, &opt_inst_list); } if (!opt_found) { err = -ENOENT; goto team_put; } - } - err = team_nl_send_event_options_get(team, &opt_inst_list); + err = team_nl_send_event_options_get(team, &opt_inst_list); + if (err) + break; + } team_put: team_nl_team_put(team); From a4cb5bdb754afe21f3e9e7164213e8600cf69427 Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Tue, 5 Feb 2019 18:21:02 +0100 Subject: [PATCH 1083/1436] xprtrdma: Make sure Send CQ is allocated on an existing compvec Make sure the device has at least 2 completion vectors before allocating to compvec#1 Fixes: a4699f5647f3 (xprtrdma: Put Send CQ in IB_POLL_WORKQUEUE mode) Signed-off-by: Nicolas Morey-Chaisemartin Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4994e75945b8..21113bfd4eca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -527,7 +527,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, sendcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_send_wr + 1, - 1, IB_POLL_WORKQUEUE); + ia->ri_device->num_comp_vectors > 1 ? 1 : 0, + IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; From ad6fef776927b4172e55a1bd97b3cd441a0c261c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Feb 2019 19:27:34 +0100 Subject: [PATCH 1084/1436] rpc: properly check debugfs dentry before using it debugfs can now report an error code if something went wrong instead of just NULL. So if the return value is to be used as a "real" dentry, it needs to be checked if it is an error before dereferencing it. This is now happening because of ff9fb72bc077 ("debugfs: return error values, not NULL"), but why debugfs files are not being created properly is an older issue, probably one that has always been there and should probably be looked at... Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Reported-by: David Howells Tested-by: David Howells Signed-off-by: Greg Kroah-Hartman Signed-off-by: Anna Schumaker --- net/sunrpc/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 45a033329cd4..19bb356230ed 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -146,7 +146,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt) rcu_read_lock(); xprt = rcu_dereference(clnt->cl_xprt); /* no "debugfs" dentry? Don't bother with the symlink. */ - if (!xprt->debugfs) { + if (IS_ERR_OR_NULL(xprt->debugfs)) { rcu_read_unlock(); return; } From d2ceb7e57086750ea6198a31fd942d98099a0786 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Feb 2019 06:09:43 -0500 Subject: [PATCH 1085/1436] NFS: Don't use page_file_mapping after removing the page If nfs_page_async_flush() removes the page from the mapping, then we can't use page_file_mapping() on it as nfs_updatepate() is wont to do when receiving an error. Instead, push the mapping to the stack before the page is possibly truncated. Fixes: 8fc75bed96bb ("NFS: Fix up return value on fatal errors in nfs_page_async_flush()") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f12cb31a41e5..d09c9f878141 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -238,9 +238,9 @@ out: } /* A writeback failed: mark the page as bad, and invalidate the page cache */ -static void nfs_set_pageerror(struct page *page) +static void nfs_set_pageerror(struct address_space *mapping) { - nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); + nfs_zap_mapping(mapping->host, mapping); } /* @@ -994,7 +994,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(req->wb_page); + nfs_set_pageerror(page_file_mapping(req->wb_page)); nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } @@ -1348,7 +1348,8 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page_file_mapping(page)->host; + struct address_space *mapping = page_file_mapping(page); + struct inode *inode = mapping->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -1366,7 +1367,7 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) - nfs_set_pageerror(page); + nfs_set_pageerror(mapping); else __set_page_dirty_nobuffers(page); out: From bd3606c29fccd0fe3d1061a61811cabf1abf7110 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Feb 2019 13:39:05 -0800 Subject: [PATCH 1086/1436] rocker: Remove port_attr_bridge_flags_get assignment After 610d2b601bba ("rocker: Remove getting PORT_BRIDGE_FLAGS") we no longer have a port_attr_bridge_flags_get member in the rocker_world_ops structre, fix that. Fixes: 610d2b601bba ("rocker: Remove getting PORT_BRIDGE_FLAGS") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 9c07f6040720..fa296a7c255d 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2813,7 +2813,6 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_stop = ofdpa_port_stop, .port_attr_stp_state_set = ofdpa_port_attr_stp_state_set, .port_attr_bridge_flags_set = ofdpa_port_attr_bridge_flags_set, - .port_attr_bridge_flags_get = ofdpa_port_attr_bridge_flags_get, .port_attr_bridge_flags_support_get = ofdpa_port_attr_bridge_flags_support_get, .port_attr_bridge_ageing_time_set = ofdpa_port_attr_bridge_ageing_time_set, .port_obj_vlan_add = ofdpa_port_obj_vlan_add, From dfcc34c99f3ebc16b787b118763bf9cb6b1efc7a Mon Sep 17 00:00:00 2001 From: Nate Dailey Date: Thu, 7 Feb 2019 14:19:01 -0500 Subject: [PATCH 1087/1436] md/raid1: don't clear bitmap bits on interrupted recovery. sync_request_write no longer submits writes to a Faulty device. This has the unfortunate side effect that bitmap bits can be incorrectly cleared if a recovery is interrupted (previously, end_sync_write would have prevented this). This means the next recovery may not copy everything it should, potentially corrupting data. Add a function for doing the proper md_bitmap_end_sync, called from end_sync_write and the Faulty case in sync_request_write. backport note to 4.14: s/md_bitmap_end_sync/bitmap_end_sync Cc: stable@vger.kernel.org 4.14+ Fixes: 0c9d5b127f69 ("md/raid1: avoid reusing a resync bio after error handling.") Reviewed-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Nate Dailey Signed-off-by: Song Liu --- drivers/md/raid1.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1d54109071cc..fa47249fa3e4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1863,6 +1863,20 @@ static void end_sync_read(struct bio *bio) reschedule_retry(r1_bio); } +static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) +{ + sector_t sync_blocks = 0; + sector_t s = r1_bio->sector; + long sectors_to_go = r1_bio->sectors; + + /* make sure these bits don't get cleared. */ + do { + md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); + s += sync_blocks; + sectors_to_go -= sync_blocks; + } while (sectors_to_go > 0); +} + static void end_sync_write(struct bio *bio) { int uptodate = !bio->bi_status; @@ -1874,15 +1888,7 @@ static void end_sync_write(struct bio *bio) struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev; if (!uptodate) { - sector_t sync_blocks = 0; - sector_t s = r1_bio->sector; - long sectors_to_go = r1_bio->sectors; - /* make sure these bits doesn't get cleared. */ - do { - md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1); - s += sync_blocks; - sectors_to_go -= sync_blocks; - } while (sectors_to_go > 0); + abort_sync_write(mddev, r1_bio); set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -2172,8 +2178,10 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) (i == r1_bio->read_disk || !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) continue; - if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) + if (test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { + abort_sync_write(mddev, r1_bio); continue; + } bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags)) From 69056ee6a8a3d576ed31e38b3b14c70d6c74edcc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Feb 2019 15:35:51 -0800 Subject: [PATCH 1088/1436] Revert "mm: don't reclaim inodes with many attached pages" This reverts commit a76cf1a474d7d ("mm: don't reclaim inodes with many attached pages"). This change causes serious changes to page cache and inode cache behaviour and balance, resulting in major performance regressions when combining worklaods such as large file copies and kernel compiles. https://bugzilla.kernel.org/show_bug.cgi?id=202441 This change is a hack to work around the problems introduced by changing how agressive shrinkers are on small caches in commit 172b06c32b94 ("mm: slowly shrink slabs with a relatively small number of objects"). It creates more problems than it solves, wasn't adequately reviewed or tested, so it needs to be reverted. Link: http://lkml.kernel.org/r/20190130041707.27750-2-david@fromorbit.com Fixes: a76cf1a474d7d ("mm: don't reclaim inodes with many attached pages") Signed-off-by: Dave Chinner Cc: Wolfgang Walter Cc: Roman Gushchin Cc: Spock Cc: Rik van Riel Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 0cd47fe0dbe5..73432e64f874 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_REMOVED; } - /* - * Recently referenced inodes and inodes with many attached pages - * get one more pass. - */ - if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) { + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; From a9a238e83fbb0df31c3b9b67003f8f9d1d1b6c96 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Feb 2019 15:35:55 -0800 Subject: [PATCH 1089/1436] Revert "mm: slowly shrink slabs with a relatively small number of objects" This reverts commit 172b06c32b9497 ("mm: slowly shrink slabs with a relatively small number of objects"). This change changes the agressiveness of shrinker reclaim, causing small cache and low priority reclaim to greatly increase scanning pressure on small caches. As a result, light memory pressure has a disproportionate affect on small caches, and causes large caches to be reclaimed much faster than previously. As a result, it greatly perturbs the delicate balance of the VFS caches (dentry/inode vs file page cache) such that the inode/dentry caches are reclaimed much, much faster than the page cache and this drives us into several other caching imbalance related problems. As such, this is a bad change and needs to be reverted. [ Needs some massaging to retain the later seekless shrinker modifications.] Link: http://lkml.kernel.org/r/20190130041707.27750-3-david@fromorbit.com Fixes: 172b06c32b9497 ("mm: slowly shrink slabs with a relatively small number of objects") Signed-off-by: Dave Chinner Cc: Wolfgang Walter Cc: Roman Gushchin Cc: Spock Cc: Rik van Riel Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index a714c4f800e9..e979705bbf32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, delta = freeable / 2; } - /* - * Make sure we apply some minimal pressure on default priority - * even on small cgroups. Stale objects are not only consuming memory - * by themselves, but can also hold a reference to a dying cgroup, - * preventing it from being reclaimed. A dying cgroup with all - * corresponding structures like per-cpu stats and kmem caches - * can be really big, so it may lead to a significant waste of memory. - */ - delta = max_t(unsigned long long, delta, min(freeable, batch_size)); - total_scan += delta; if (total_scan < 0) { pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", From 414fd080d125408cb15d04ff4907e1dd8145c8c7 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 12 Feb 2019 15:35:58 -0800 Subject: [PATCH 1090/1436] mm/gup: fix gup_pmd_range() for dax For dax pmd, pmd_trans_huge() returns false but pmd_huge() returns true on x86. So the function works as long as hugetlb is configured. However, dax doesn't depend on hugetlb. Link: http://lkml.kernel.org/r/20190111034033.601-1-yuzhao@google.com Signed-off-by: Yu Zhao Reviewed-by: Jan Kara Cc: Dan Williams Cc: Huang Ying Cc: Matthew Wilcox Cc: Keith Busch Cc: "Michael S . Tsirkin" Cc: John Hubbard Cc: Wei Yang Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 05acd7e2eb22..75029649baca 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (!pmd_present(pmd)) return 0; - if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { + if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || + pmd_devmap(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they From 2f1ee0913ce58efe7f18fbd518bd54c598559b89 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 12 Feb 2019 15:36:03 -0800 Subject: [PATCH 1091/1436] Revert "mm: use early_pfn_to_nid in page_ext_init" This reverts commit fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init"). When booting a system with "page_owner=on", start_kernel page_ext_init invoke_init_callbacks init_section_page_ext init_page_owner init_early_allocated_pages init_zones_in_node init_pages_in_zone lookup_page_ext page_to_nid The issue here is that page_to_nid() will not work since some page flags have no node information until later in page_alloc_init_late() due to DEFERRED_STRUCT_PAGE_INIT. Hence, it could trigger an out-of-bounds access with an invalid nid. UBSAN: Undefined behaviour in ./include/linux/mm.h:1104:50 index 7 is out of range for type 'zone [5]' Also, kernel will panic since flags were poisoned earlier with, CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_NODE_NOT_IN_PAGE_FLAGS=n start_kernel setup_arch pagetable_init paging_init sparse_init sparse_init_nid memblock_alloc_try_nid_raw It did not handle it well in init_pages_in_zone() which ends up calling page_to_nid(). page:ffffea0004200000 is uninitialized and poisoned raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff raw: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) page_owner info is not active (free page?) kernel BUG at include/linux/mm.h:990! RIP: 0010:init_page_owner+0x486/0x520 This means that assumptions behind commit fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init") are incomplete. Therefore, revert the commit for now. A proper way to move the page_owner initialization to sooner is to hook into memmap initialization. Link: http://lkml.kernel.org/r/20190115202812.75820-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Pasha Tatashin Cc: Mel Gorman Cc: Yang Shi Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 3 ++- mm/page_ext.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/init/main.c b/init/main.c index e2e80ca3165a..c86a1c8f19f4 100644 --- a/init/main.c +++ b/init/main.c @@ -695,7 +695,6 @@ asmlinkage __visible void __init start_kernel(void) initrd_start = 0; } #endif - page_ext_init(); kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); @@ -1131,6 +1130,8 @@ static noinline void __init kernel_init_freeable(void) sched_init_smp(); page_alloc_init_late(); + /* Initialize page ext after all struct pages are initialized. */ + page_ext_init(); do_basic_setup(); diff --git a/mm/page_ext.c b/mm/page_ext.c index ae44f7adbe07..8c78b8d45117 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -398,10 +398,8 @@ void __init page_ext_init(void) * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... - * - * Take into account DEFERRED_STRUCT_PAGE_INIT. */ - if (early_pfn_to_nid(pfn) != nid) + if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; From 76ce2a80a28ef5caa8cc0cd41ad04138fb7ffeed Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 12 Feb 2019 15:36:06 -0800 Subject: [PATCH 1092/1436] Rename include/{uapi => }/asm-generic/shmparam.h really Commit 36c0f7f0f899 ("arch: unexport asm/shmparam.h for all architectures") is different from the patch I submitted. My patch is this: https://lore.kernel.org/lkml/1546904307-11124-1-git-send-email-yamada.masahiro@socionext.com/T/#u The file renaming part: rename include/{uapi => }/asm-generic/shmparam.h (100%) was lost when it was picked up. I think it was an accident because Andrew did not say anything. Link: http://lkml.kernel.org/r/1549158277-24558-1-git-send-email-yamada.masahiro@socionext.com Fixes: 36c0f7f0f899 ("arch: unexport asm/shmparam.h for all architectures") Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/{uapi => }/asm-generic/shmparam.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename include/{uapi => }/asm-generic/shmparam.h (100%) diff --git a/include/uapi/asm-generic/shmparam.h b/include/asm-generic/shmparam.h similarity index 100% rename from include/uapi/asm-generic/shmparam.h rename to include/asm-generic/shmparam.h From 27dd768ed8db48beefc4d9e006c58e7a00342bde Mon Sep 17 00:00:00 2001 From: Sandeep Patil Date: Tue, 12 Feb 2019 15:36:11 -0800 Subject: [PATCH 1093/1436] mm: proc: smaps_rollup: fix pss_locked calculation The 'pss_locked' field of smaps_rollup was being calculated incorrectly. It accumulated the current pss everytime a locked VMA was found. Fix that by adding to 'pss_locked' the same time as that of 'pss' if the vma being walked is locked. Link: http://lkml.kernel.org/r/20190203065425.14650-1-sspatil@android.com Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Sandeep Patil Acked-by: Vlastimil Babka Reviewed-by: Joel Fernandes (Google) Cc: Alexey Dobriyan Cc: Daniel Colascione Cc: [4.14.x, 4.19.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0ec9edab2f3..85b0ef890b28 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -423,7 +423,7 @@ struct mem_size_stats { }; static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty) + bool compound, bool young, bool dirty, bool locked) { int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, else mss->private_clean += size; mss->pss += (u64)size << PSS_SHIFT; + if (locked) + mss->pss_locked += (u64)size << PSS_SHIFT; return; } for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); + unsigned long pss = (PAGE_SIZE << PSS_SHIFT); if (mapcount >= 2) { if (dirty || PageDirty(page)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + mss->pss += pss / mapcount; + if (locked) + mss->pss_locked += pss / mapcount; } else { if (dirty || PageDirty(page)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; - mss->pss += PAGE_SIZE << PSS_SHIFT; + mss->pss += pss; + if (locked) + mss->pss_locked += pss; } } } @@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; if (pte_present(*pte)) { @@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page; /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else VM_BUG_ON_PAGE(1, page); - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma, } } #endif - /* mmap_sem is held in m_start */ walk_page_vma(vma, &smaps_walk); - if (vma->vm_flags & VM_LOCKED) - mss->pss_locked += mss->pss; } #define SEQ_PUT_DEC(str, val) \ From 0f56623dc48aaee2ead9d61098fc6c2a2d864c78 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Feb 2019 15:39:58 -0800 Subject: [PATCH 1094/1436] mlxsw: spectrum_switchdev: Remove unused variables After 1ecb195753a1 ("mlxsw: spectrum_switchdev: Remove getting PORT_BRIDGE_FLAGS") we are not accessing any driver private data structure, so the mlxsw_sp_port and mlxsw_sp variables are unused. Fixes: 1ecb195753a1 ("mlxsw: spectrum_switchdev: Remove getting PORT_BRIDGE_FLAGS") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 4c5780f8f4b2..1f492b7dbea8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -434,9 +434,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | From fd80a14363eec09cef018dc8a1074efc298a0a07 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Feb 2019 15:39:59 -0800 Subject: [PATCH 1095/1436] staging: fsl-dpaa2: ethsw: Remove unused port_priv variable After 1b8b589d9103 ("staging: fsl-dpaa2: ethsw: Remove getting PORT_BRIDGE_FLAGS") we are not accessing any driver private data anymore, remove port_priv which is unused. Fixes: 1b8b589d9103 ("staging: fsl-dpaa2: ethsw: Remove getting PORT_BRIDGE_FLAGS") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index e5a14c7c777f..1b3943b71254 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -643,8 +643,6 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev) static int swdev_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr) { - struct ethsw_port_priv *port_priv = netdev_priv(netdev); - switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; From 62eebea655d4be5a20fd563abfd7656724cdcd00 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Fri, 11 Jan 2019 12:48:25 +0800 Subject: [PATCH 1096/1436] csky: Fixup _PAGE_GLOBAL bit for 610 tlb entry C-SKY CPU 8xx's _PAGE_GLOBAL is BIT(0), but 610's _PAGE_GLOBAL is BIT(6). Use _PAGE_GLOBAL macro instead of bad magic number. Signed-off-by: Guo Ren --- arch/csky/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index edfcbb25fd9f..af7a7faa1010 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -45,8 +45,8 @@ ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset_t(address)) #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) #define pte_clear(mm, addr, ptep) set_pte((ptep), \ - (((unsigned int)addr&0x80000000)?__pte(1):__pte(0))) -#define pte_none(pte) (!(pte_val(pte)&0xfffffffe)) + (((unsigned int) addr & PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0))) +#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL)) #define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pte_pfn(x) ((unsigned long)((x).pte_low >> PAGE_SHIFT)) #define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PAGE_SHIFT) \ From 9216cd7231c12a8c391bb2c904d13695398d3453 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 24 Jan 2019 22:16:31 +0800 Subject: [PATCH 1097/1436] csky: Fixup wrong pt_regs size The bug is from commit 2054f4af1957 ("csky: bugfix gdb coredump error.") We change the ELF_NGREG to ELF_NGREG - 2 to fit gdb&gcc define, but forgot modify ptrace regset. Now coredump use ELF_NRGEG to parse GPRs and ptrace use pt_regs_regset, so there are two different reg_sets for userspace. Signed-off-by: Guo Ren --- arch/csky/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 57f1afe19a52..f2f12fff36f7 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -159,7 +160,7 @@ static int fpr_set(struct task_struct *target, static const struct user_regset csky_regsets[] = { [REGSET_GPR] = { .core_note_type = NT_PRSTATUS, - .n = ELF_NGREG, + .n = sizeof(struct pt_regs) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .get = &gpr_get, From 0f231dcfc664aaafa75a006ee10e55f3ae0c9b3c Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 24 Jan 2019 22:43:58 +0800 Subject: [PATCH 1098/1436] csky: coding convention: Use task_stack_page Use task_stack_page instead of p->stack to get stack. Follow the coding convention style. Also for init_stack, the same with other archs. Signed-off-by: Guo Ren --- arch/csky/include/asm/processor.h | 4 ++-- arch/csky/kernel/smp.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 8f454810514f..21e0bd5293dd 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -49,7 +49,7 @@ struct thread_struct { }; #define INIT_THREAD { \ - .ksp = (unsigned long) init_thread_union.stack + THREAD_SIZE, \ + .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ .sr = DEFAULT_PSR_VALUE, \ } @@ -95,7 +95,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->usp) #define task_pt_regs(p) \ - ((struct pt_regs *)(THREAD_SIZE + p->stack) - 1) + ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1) #define cpu_relax() barrier() diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index ddc4dd79f282..b07a534b3062 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -160,7 +160,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { unsigned long mask = 1 << cpu; - secondary_stack = (unsigned int)tidle->stack + THREAD_SIZE - 8; + secondary_stack = + (unsigned int) task_stack_page(tidle) + THREAD_SIZE - 8; secondary_hint = mfcr("cr31"); secondary_ccr = mfcr("cr18"); From 76d21d186a65523b08ea5f70302e2c29ee8f6a8d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 30 Jan 2019 20:13:11 +0800 Subject: [PATCH 1099/1436] csky: Fixup io-range page attribute for mmap("/dev/mem") Some user space drivers need accessing IO address and IO remap need SO(strong order) page-attribute to make IO operation correct. So we need add SO-page-attr for all non-memory address. Signed-off-by: Guo Ren Reported-by: Fan Xiaodong --- arch/csky/include/asm/pgtable.h | 5 +++++ arch/csky/mm/ioremap.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index af7a7faa1010..dcea277c09ae 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -241,6 +241,11 @@ static inline pte_t pte_mkyoung(pte_t pte) #define pgd_index(address) ((address) >> PGDIR_SHIFT) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); + /* * Macro to make mark a page protection value as "uncacheable". Note * that "protection" is really a misnomer here as the protection value diff --git a/arch/csky/mm/ioremap.c b/arch/csky/mm/ioremap.c index cb7c03e5cd21..8473b6bdf512 100644 --- a/arch/csky/mm/ioremap.c +++ b/arch/csky/mm/ioremap.c @@ -46,3 +46,17 @@ void iounmap(void __iomem *addr) vunmap((void *)((unsigned long)addr & PAGE_MASK)); } EXPORT_SYMBOL(iounmap); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) { + vma_prot.pgprot |= _PAGE_SO; + return pgprot_noncached(vma_prot); + } else if (file->f_flags & O_SYNC) { + return pgprot_noncached(vma_prot); + } + + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); From 131aee8b9807bc98379fa5a0270389dbc7dcec90 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 31 Jan 2019 14:34:37 +0800 Subject: [PATCH 1100/1436] csky: Fixup dead loop in show_stack When STACKTRACE is enabled, we must pass fp as stack for unwind, otherwise random value in stack will casue a dead loop. Signed-off-by: Guo Ren Reported-by: Lu Baoquan --- arch/csky/kernel/dumpstack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/csky/kernel/dumpstack.c b/arch/csky/kernel/dumpstack.c index 659253e9989c..d67f9777cfd9 100644 --- a/arch/csky/kernel/dumpstack.c +++ b/arch/csky/kernel/dumpstack.c @@ -38,7 +38,11 @@ void show_stack(struct task_struct *task, unsigned long *stack) if (task) stack = (unsigned long *)thread_saved_fp(task); else +#ifdef CONFIG_STACKTRACE + asm volatile("mov %0, r8\n":"=r"(stack)::"memory"); +#else stack = (unsigned long *)&stack; +#endif } show_trace(stack); From e4a056987c86f402f1286e050b1dee3f4ce7c7eb Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 12 Feb 2019 08:05:25 -0800 Subject: [PATCH 1101/1436] scsi: sd: fix entropy gathering for most rotational disks The problem is that the default for MQ is not to gather entropy, whereas the default for the legacy queue was always to gather it. The original attempt to fix entropy gathering for rotational disks under MQ added an else branch in sd_read_block_characteristics(). Unfortunately, the entire check isn't reached if the device has no characteristics VPD page. Since this page was only introduced in SBC-3 and its optional anyway, most less expensive rotational disks don't have one, meaning they all stopped gathering entropy when we made MQ the default. In a wholly unrelated change, openssl and openssh won't function until the random number generator is initialised, meaning lots of people have been seeing large delays before they could log into systems with default MQ kernels due to this lack of entropy, because it now can take tens of minutes to initialise the kernel random number generator. The fix is to set the non-rotational and add-randomness flags unconditionally early on in the disk initialization path, so they can be reset only if the device actually reports being non-rotational via the VPD page. Reported-by: Mikael Pettersson Fixes: 83e32a591077 ("scsi: sd: Contribute to randomness when running rotational device") Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Reviewed-by: Jens Axboe Reviewed-by: Xuewei Zhang Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b2da8a00ec33..5464d467e23e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2951,9 +2951,6 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) if (rot == 1) { blk_queue_flag_set(QUEUE_FLAG_NONROT, q); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - } else { - blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); - blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); } if (sdkp->device->type == TYPE_ZBC) { @@ -3090,6 +3087,15 @@ static int sd_revalidate_disk(struct gendisk *disk) if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); + /* + * set the default to rotational. All non-rotational devices + * support the block characteristics VPD page, which will + * cause this to be updated correctly and any device which + * doesn't support it should be treated as rotational. + */ + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); + if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); sd_read_block_limits(sdkp); From 388a49959ee4e4e99f160241d9599efa62cd4299 Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Tue, 12 Feb 2019 09:29:50 -0500 Subject: [PATCH 1102/1436] scsi: qla2xxx: Fix panic from use after free in qla2x00_async_tm_cmd In qla2x00_async_tm_cmd, we reference off sp after it has been freed. This caused a panic on a system running a slub debug kernel. Since fcport is passed in anyways, just use that instead. Signed-off-by: Bill Kuzeja Acked-by: Giridhar Malavali Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 364bb52ed2a6..109587e62983 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1785,13 +1785,13 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, /* Issue Marker IOCB */ qla2x00_marker(vha, vha->hw->req_q_map[0], - vha->hw->rsp_q_map[0], sp->fcport->loop_id, lun, + vha->hw->rsp_q_map[0], fcport->loop_id, lun, flags == TCF_LUN_RESET ? MK_SYNC_ID_LUN : MK_SYNC_ID); } done_free_sp: sp->free(sp); - sp->fcport->flags &= ~FCF_ASYNC_SENT; + fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; } From 528871b456026e6127d95b1b2bd8e3a003dc1614 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2019 07:57:02 +0100 Subject: [PATCH 1103/1436] perf/core: Fix impossible ring-buffer sizes warning The following commit: 9dff0aa95a32 ("perf/core: Don't WARN() for impossible ring-buffer sizes") results in perf recording failures with larger mmap areas: root@skl:/tmp# perf record -g -a failed to mmap with 12 (Cannot allocate memory) The root cause is that the following condition is buggy: if (order_base_2(size) >= MAX_ORDER) goto fail; The problem is that @size is in bytes and MAX_ORDER is in pages, so the right test is: if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) goto fail; Fix it. Reported-by: "Jin, Yao" Bisected-by: Borislav Petkov Analyzed-by: Peter Zijlstra Cc: Julien Thierry Cc: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Fixes: 9dff0aa95a32 ("perf/core: Don't WARN() for impossible ring-buffer sizes") Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 309ef5a64af5..5ab4fe3b1dcc 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -734,7 +734,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct ring_buffer); size += nr_pages * sizeof(void *); - if (order_base_2(size) >= MAX_ORDER) + if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) goto fail; rb = kzalloc(size, GFP_KERNEL); From 83e418a805d880a8b18add07f94d19b2a5a80307 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 9 Feb 2019 01:58:50 +0100 Subject: [PATCH 1104/1436] mmc: meson-gx: fix interrupt name Commit bb364890323cca ("mmc: meson-gx: Free irq in release() callback") changed the _probe code to use request_threaded_irq() instead of devm_request_threaded_irq(). Unfortunately this removes a fallback for the interrupt name: devm_request_threaded_irq() uses the device name as fallback if the given IRQ name is NULL. request_threaded_irq() has no such fallback, thus /proc/interrupts shows "(null)" instead. Explicitly pass the dev_name() so we get the IRQ name shown in /proc/interrupts again. While here, also fix the indentation of the request_threaded_irq() parameter list. Fixes: bb364890323cca ("mmc: meson-gx: Free irq in release() callback") Signed-off-by: Martin Blumenstingl Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index f19ec60bcbdc..2eba507790e4 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1338,7 +1338,8 @@ static int meson_mmc_probe(struct platform_device *pdev) host->regs + SD_EMMC_IRQ_EN); ret = request_threaded_irq(host->irq, meson_mmc_irq, - meson_mmc_irq_thread, IRQF_SHARED, NULL, host); + meson_mmc_irq_thread, IRQF_SHARED, + dev_name(&pdev->dev), host); if (ret) goto err_init_clk; From 7fc38225363dd8f19e667ad7c77b63bc4a5c065d Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Wed, 13 Feb 2019 09:14:53 +0100 Subject: [PATCH 1105/1436] netfilter: reject: skip csum verification for protocols that don't support it Some protocols have other means to verify the payload integrity (AH, ESP, SCTP) while others are incompatible with nf_ip(6)_checksum implementation because checksum is either optional or might be partial (UDPLITE, DCCP, GRE). Because nf_ip(6)_checksum was used to validate the packets, ip(6)tables REJECT rules were not capable to generate ICMP(v6) errors for the protocols mentioned above. This commit also fixes the incorrect pseudo-header protocol used for IPv4 packets that carry other transport protocols than TCP or UDP (pseudo-header used protocol 0 iso the proper value). Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 1 + include/net/netfilter/ipv6/nf_reject.h | 1 + include/net/netfilter/nf_reject.h | 27 ++++++++++++++++++++++++ net/bridge/netfilter/nft_reject_bridge.c | 10 ++++----- net/ipv4/netfilter/nf_reject_ipv4.c | 9 ++------ net/ipv6/netfilter/nf_reject_ipv6.c | 3 +++ 6 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 include/net/netfilter/nf_reject.h diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index 2eb43fcefc50..40e0e0623f46 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -5,6 +5,7 @@ #include #include #include +#include void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook); diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 3a5a9a36a0b2..4a3ef9ebdf6f 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -3,6 +3,7 @@ #define _IPV6_NF_REJECT_H #include +#include void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, unsigned int hooknum); diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h new file mode 100644 index 000000000000..221f877f29d1 --- /dev/null +++ b/include/net/netfilter/nf_reject.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_REJECT_H +#define _NF_REJECT_H + +static inline bool nf_reject_verify_csum(__u8 proto) +{ + /* Skip protocols that don't use 16-bit one's complement checksum + * of the entire payload. + */ + switch (proto) { + /* Protocols with other integrity checks. */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_SCTP: + + /* Protocols with partial checksums. */ + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + + /* Protocols with optional checksums. */ + case IPPROTO_GRE: + return false; + } + return true; +} + +#endif /* _NF_REJECT_H */ diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 419e8edf23ba..1b1856744c80 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -125,13 +125,10 @@ static void nft_reject_br_send_v4_unreach(struct net *net, if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) return; - if (ip_hdr(oldskb)->protocol == IPPROTO_TCP || - ip_hdr(oldskb)->protocol == IPPROTO_UDP) - proto = ip_hdr(oldskb)->protocol; - else - proto = 0; + proto = ip_hdr(oldskb)->protocol; if (!skb_csum_unnecessary(oldskb) && + nf_reject_verify_csum(proto) && nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) return; @@ -234,6 +231,9 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index aa8304c618b8..7dc3c324b911 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -173,21 +173,16 @@ EXPORT_SYMBOL_GPL(nf_send_reset); void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) { struct iphdr *iph = ip_hdr(skb_in); - u8 proto; + u8 proto = iph->protocol; if (iph->frag_off & htons(IP_OFFSET)) return; - if (skb_csum_unnecessary(skb_in)) { + if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); return; } - if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP) - proto = iph->protocol; - else - proto = 0; - if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index b9c8a763c863..02e9228641e0 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } From 10970e1b4be9c74fce8ab6e3c34a7d718f063f2c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 12 Feb 2019 14:28:03 +0100 Subject: [PATCH 1106/1436] x86/a.out: Clear the dump structure initially dump_thread32() in aout_core_dump() does not clear the user32 structure allocated on the stack as the first thing on function entry. As a result, the dump.u_comm, dump.u_ar0 and dump.signal which get assigned before the clearing, get overwritten. Rename that function to fill_dump() to make it clear what it does and call it first thing. This was caught while staring at a patch by Derek Robson . Signed-off-by: Borislav Petkov Cc: Derek Robson Cc: Linus Torvalds Cc: Michael Matz Cc: x86@kernel.org Cc: Link: https://lkml.kernel.org/r/20190202005512.3144-1-robsonde@gmail.com --- arch/x86/ia32/ia32_aout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index f65b78d32f5e..7dbbe9ffda17 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -51,7 +51,7 @@ static unsigned long get_dr(int n) /* * fill in the user structure for a core dump.. */ -static void dump_thread32(struct pt_regs *regs, struct user32 *dump) +static void fill_dump(struct pt_regs *regs, struct user32 *dump) { u32 fs, gs; memset(dump, 0, sizeof(*dump)); @@ -157,10 +157,12 @@ static int aout_core_dump(struct coredump_params *cprm) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; + + fill_dump(cprm->regs, &dump); + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = offsetof(struct user32, regs); dump.signal = cprm->siginfo->si_signo; - dump_thread32(cprm->regs, &dump); /* * If the size of the dump file exceeds the rlimit, then see From cf43a757fd49442bc38f76088b70c2299eed2c2f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 Feb 2019 23:27:42 -0600 Subject: [PATCH 1107/1436] signal: Restore the stop PTRACE_EVENT_EXIT In the middle of do_exit() there is there is a call "ptrace_event(PTRACE_EVENT_EXIT, code);" That call places the process in TACKED_TRACED aka "(TASK_WAKEKILL | __TASK_TRACED)" and waits for for the debugger to release the task or SIGKILL to be delivered. Skipping past dequeue_signal when we know a fatal signal has already been delivered resulted in SIGKILL remaining pending and TIF_SIGPENDING remaining set. This in turn caused the scheduler to not sleep in PTACE_EVENT_EXIT as it figured a fatal signal was pending. This also caused ptrace_freeze_traced in ptrace_check_attach to fail because it left a per thread SIGKILL pending which is what fatal_signal_pending tests for. This difference in signal state caused strace to report strace: Exit of unknown pid NNNNN ignored Therefore update the signal handling state like dequeue_signal would when removing a per thread SIGKILL, by removing SIGKILL from the per thread signal mask and clearing TIF_SIGPENDING. Acked-by: Oleg Nesterov Reported-by: Oleg Nesterov Reported-by: Ivan Delalande Cc: stable@vger.kernel.org Fixes: 35634ffa1751 ("signal: Always notice exiting tasks") Signed-off-by: "Eric W. Biederman" --- kernel/signal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 99fa8ff06fd9..57b7771e20d7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2436,9 +2436,12 @@ relock: } /* Has this task already been marked for death? */ - ksig->info.si_signo = signr = SIGKILL; - if (signal_group_exit(signal)) + if (signal_group_exit(signal)) { + ksig->info.si_signo = signr = SIGKILL; + sigdelset(¤t->pending.signal, SIGKILL); + recalc_sigpending(); goto fatal; + } for (;;) { struct k_sigaction *ka; From 1d69511e49b0107c0a60ff5ef488f5a2512a50ae Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 12 Feb 2019 09:54:31 -0500 Subject: [PATCH 1108/1436] drm/amdgpu/psp11: TA firmware is optional (v3) Don't warn or fail if it's missing. v2: handle xgmi case more gracefully. v3: handle older kernels properly Reviewed-by: Hawking Zhang Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 26 ++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8fab0d637ee5..3a9b48b227ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -90,8 +90,10 @@ static int psp_sw_fini(void *handle) adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); adev->psp.asd_fw = NULL; - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (adev->psp.ta_fw) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + } return 0; } @@ -435,6 +437,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; + if (!psp->adev->psp.ta_fw) + return -ENOENT; + if (!psp->xgmi_context.initialized) { ret = psp_xgmi_init_shared_buf(psp); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 0c6e7f9b143f..189fcb004579 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -152,18 +152,22 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); - if (err) - goto out2; + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; - err = amdgpu_ucode_validate(adev->psp.ta_fw); - if (err) - goto out2; - - ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + - le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + } return 0; From 753c111f655e38bbd52fc01321266633f022ebe2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Feb 2019 13:03:53 +0100 Subject: [PATCH 1109/1436] netfilter: nft_compat: use-after-free when deleting targets Fetch pointer to module before target object is released. Fixes: 29e3880109e3 ("netfilter: nf_tables: fix use-after-free when deleting compat expressions") Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index fe64df848365..0a4bad55a8aa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -315,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -325,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, From 224d71ccc07c6b221e4fdff7cd3d04c6033228c5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Feb 2019 13:56:07 +0200 Subject: [PATCH 1110/1436] net/mlx5: Align ODP capability function with netdev coding style Update newly introduced function to be aligned to netdev coding style. Fixes: 46861e3e88be ("net/mlx5: Set ODP SRQ support in firmware") Reported-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e38aa206ab6d..6d45518edbdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -461,9 +461,9 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) static int handle_hca_cap_odp(struct mlx5_core_dev *dev) { - void *set_ctx; void *set_hca_cap; - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_ctx; + int set_sz; int err; if (!MLX5_CAP_GEN(dev, pg)) @@ -473,15 +473,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) if (err) return err; - /** - * If all bits are cleared we shouldn't try to set it - * or we might fail while trying to access a reserved bit. - */ if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) return 0; + set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) return -ENOMEM; @@ -492,13 +489,13 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) /* set ODP SRQ support for RC/UD and XRC transports */ MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive)); MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive)); MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, - (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)); err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); From bc44121190aea96de171408310db3d3c87e2cc11 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 12 Feb 2019 16:42:23 -0800 Subject: [PATCH 1111/1436] KVM: nVMX: Restore a preemption timer consistency check A recently added preemption timer consistency check was unintentionally dropped when the consistency checks were being reorganized to match the SDM's ordering. Fixes: 461b4ba4c7ad ("KVM: nVMX: Move the checks for VM-Execution Control Fields to a separate helper function") Cc: Krish Sadhukhan Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8b45205b4e1d..d737a51a53ca 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2473,6 +2473,10 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) return -EINVAL; + if (!nested_cpu_has_preemption_timer(vmcs12) && + nested_cpu_has_save_preemption_timer(vmcs12)) + return -EINVAL; + if (nested_cpu_has_ept(vmcs12) && !valid_ept_address(vcpu, vmcs12->ept_pointer)) return -EINVAL; From fb35c534b7881c0f7f94b01ddd95a9b17483252f Mon Sep 17 00:00:00 2001 From: Maria Pasechnik Date: Sun, 3 Feb 2019 17:55:09 +0200 Subject: [PATCH 1112/1436] net/mlx5e: Fix NULL pointer derefernce in set channels error flow New channels are applied to the priv channels only after they are successfully opened. Then, the indirection table should be built according to the new number of channels. Currently, such build is preformed independently of whether the channels opening is successful, and is not reverted on failure. The bug is caused due to removal of rss params from channels struct and moving it to priv struct. That change cause to independency between channels and rss params. This causes a crash on a later point, when accessing rqn of a non existing channel. This patch fixes it by moving the indirection table build right before switching the priv channels to new channels struct, after the new set of channels was successfully opened. Fixes: bbeb53b8b2c9 ("net/mlx5e: Move RSS params to a dedicated struct") Signed-off-by: Maria Pasechnik Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3bbccead2f63..47233b9a4f81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -354,9 +354,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -372,6 +369,10 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (arfs_enabled) mlx5e_arfs_disable(priv); + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + /* Switch to new channels, set new parameters and close old ones */ mlx5e_switch_priv_channels(priv, &new_channels, NULL); From 4cab346bcf74f44665d57726ec2bccff6e679619 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 7 Feb 2019 09:22:56 -0600 Subject: [PATCH 1113/1436] net/mlx5: No command allowed when command interface is not ready When EEH is injected and PCI bus stalls, mlx5's pci error detect function is called to deactivate the command interface and tear down the device. The issue is that there can be a thread that already passed MLX5_DEVICE_STATE_INTERNAL_ERROR check, it will send the command and stuck in the wait_func. Solution: Add function mlx5_cmd_flush to disable command interface and clear all the pending commands. When device state is set to MLX5_DEVICE_STATE_INTERNAL_ERROR, call mlx5_cmd_flush to ensure all pending threads waiting for firmware commands completion are terminated. Fixes: c1d4d2e92ad6 ("net/mlx5: Avoid calling sleeping function by the health poll thread") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 18 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/health.c | 2 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..e267ff93e8a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1583,6 +1583,24 @@ no_trig: spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); } +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + while (down_trylock(&cmd->sem)) + mlx5_cmd_trigger_completions(dev); + + while (down_trylock(&cmd->pages_sem)) + mlx5_cmd_trigger_completions(dev); + + /* Unlock cmdif */ + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + static int status_to_err(u8 status) { return status ? -1 : 0; /* TBD more meaningful codes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 196c07383082..cb9fa3430c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -103,7 +103,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - mlx5_cmd_trigger_completions(dev); + mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 5300b0b6d836..4fdac020b795 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -126,6 +126,7 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts); void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); +void mlx5_cmd_flush(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); From 5400261e4d293d741c5b71a07f6eaabe2c8d3f1b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 22 Jan 2019 14:18:04 +0200 Subject: [PATCH 1114/1436] net/mlx5: Fix a compilation warning in events.c Eliminate the following compilation warning: drivers/net/ethernet/mellanox/mlx5/core/events.c: warning: 'error_str' may be used uninitialized in this function [-Wuninitialized]: => 238:3 Fixes: c2fb3db22d35 ("net/mlx5: Rework handling of port module events") Signed-off-by: Tariq Toukan Reviewed-by: Mikhael Goikhman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/events.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index fbc42b7252a9..503035469d2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -211,11 +211,10 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data enum port_module_event_status_type module_status; enum port_module_event_error_type error_type; struct mlx5_eqe_port_module *module_event_eqe; - const char *status_str, *error_str; + const char *status_str; u8 module_num; module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; module_status = module_event_eqe->module_status & PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = module_event_eqe->error_type & @@ -223,25 +222,27 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data if (module_status < MLX5_MODULE_STATUS_NUM) events->pme_stats.status_counters[module_status]++; - status_str = mlx5_pme_status_to_string(module_status); - if (module_status == MLX5_MODULE_STATUS_ERROR) { + if (module_status == MLX5_MODULE_STATUS_ERROR) if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) events->pme_stats.error_counters[error_type]++; - error_str = mlx5_pme_error_to_string(error_type); - } if (!printk_ratelimit()) return NOTIFY_OK; - if (module_status == MLX5_MODULE_STATUS_ERROR) + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + mlx5_core_err(events->dev, "Port module event[error]: module %u, %s, %s\n", module_num, status_str, error_str); - else + } else { mlx5_core_info(events->dev, "Port module event: module %u, %s\n", module_num, status_str); + } return NOTIFY_OK; } From 407e17b1a69a51ba9a512a04342da56c1f931df4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Feb 2019 16:27:02 -0800 Subject: [PATCH 1115/1436] net/mlx5e: XDP, fix redirect resources availability check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently mlx5 driver creates xdp redirect hw queues unconditionally on netdevice open, This is great until someone starts redirecting XDP traffic via ndo_xdp_xmit on mlx5 device and changes the device configuration at the same time, this might cause crashes, since the other device's napi is not aware of the mlx5 state change (resources un-availability). To fix this we must synchronize with other devices napi's on the system. Added a new flag under mlx5e_priv to determine XDP TX resources are available, set/clear it up when necessary and use synchronize_rcu() when the flag is turned off, so other napi's are in-sync with it, before we actually cleanup the hw resources. The flag is tested prior to committing to transmit on mlx5e_xdp_xmit, and it is sufficient to determine if it safe to transmit or not. The other two internal flags (MLX5E_STATE_OPENED and MLX5E_SQ_STATE_ENABLED) become unnecessary. Thus, they are removed from data path. Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side") Reported-by: Toke Høiland-Jørgensen Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 6 ++---- .../net/ethernet/mellanox/mlx5/core/en/xdp.h | 17 +++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8fa8fdd30b85..448a92561567 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -657,6 +657,7 @@ struct mlx5e_channel_stats { enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, }; struct mlx5e_rqt { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 3740177eed09..03b2a9f9c589 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -365,7 +365,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int sq_num; int i; - if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state))) + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -378,9 +379,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return -ENETDOWN; - for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; struct mlx5e_xdp_info xdpi; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 3a67cb3cd179..ee27a7c8cd87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -50,6 +50,23 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* let other device's napi(s) see our new state */ + synchronize_rcu(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 01819e5c9975..93e50ccd44c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2938,6 +2938,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); if (mlx5e_is_vport_rep(priv)) @@ -2959,6 +2960,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) */ netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } From a4eaed9f9a895b16bb2c54e0ff6b3c99404fec92 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:26 +0100 Subject: [PATCH 1116/1436] net: phy: Mask-out non-compatible modes when setting the max-speed When setting a PHY's max speed using either the max-speed DT property or ethtool, we should mask-out all non-compatible modes according to the settings table, instead of just the 10/100BASET modes. Signed-off-by: Maxime Chevallier Suggested-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 45 ++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 53 ------------------------------------ include/linux/phy.h | 1 + 3 files changed, 46 insertions(+), 53 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index cdea028d1328..855abf487279 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -4,6 +4,7 @@ */ #include #include +#include const char *phy_speed_to_str(int speed) { @@ -338,6 +339,50 @@ size_t phy_speeds(unsigned int *speeds, size_t size, return count; } +static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) +{ + const struct phy_setting *p; + int i; + + for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { + if (p->speed > max_speed) + linkmode_clear_bit(p->bit, phydev->supported); + else + break; + } + + return 0; +} + +int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) +{ + int err; + + err = __set_phy_supported(phydev, max_speed); + if (err) + return err; + + linkmode_copy(phydev->advertising, phydev->supported); + + return 0; +} +EXPORT_SYMBOL(phy_set_max_speed); + +void of_set_phy_supported(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 max_speed; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (!of_property_read_u32(node, "max-speed", &max_speed)) + __set_phy_supported(phydev, max_speed); +} + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2c61282a2726..64497ec293e1 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1949,44 +1949,6 @@ int genphy_loopback(struct phy_device *phydev, bool enable) } EXPORT_SYMBOL(genphy_loopback); -static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) -{ - switch (max_speed) { - case SPEED_10: - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - phydev->supported); - /* fall through */ - case SPEED_100: - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported); - break; - case SPEED_1000: - break; - default: - return -ENOTSUPP; - } - - return 0; -} - -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) -{ - int err; - - err = __set_phy_supported(phydev, max_speed); - if (err) - return err; - - linkmode_copy(phydev->advertising, phydev->supported); - - return 0; -} -EXPORT_SYMBOL(phy_set_max_speed); - /** * phy_remove_link_mode - Remove a supported link mode * @phydev: phy_device structure to remove link mode from @@ -2117,21 +2079,6 @@ bool phy_validate_pause(struct phy_device *phydev, } EXPORT_SYMBOL(phy_validate_pause); -static void of_set_phy_supported(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 max_speed; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (!of_property_read_u32(node, "max-speed", &max_speed)) - __set_phy_supported(phydev, max_speed); -} - static void of_set_phy_eee_broken(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; diff --git a/include/linux/phy.h b/include/linux/phy.h index 378da9a6165e..20344c7744d8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -673,6 +673,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, bool exact); size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); +void of_set_phy_supported(struct phy_device *phydev); static inline bool __phy_is_started(struct phy_device *phydev) { From 3feb9b23bf4cbf9f34568035170c6f1c25416523 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:27 +0100 Subject: [PATCH 1117/1436] net: phy: Move of_set_phy_eee_broken to phy-core.c Since of_set_phy_supported was moved to phy-core.c, we can also move of_set_phy_eee_broken to the same location, so that we have all OF functions in the same place. This patch doesn't intend to introduce any change in behaviour. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 27 +++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 28 ---------------------------- include/linux/phy.h | 1 + 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 855abf487279..de58a59815d5 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -383,6 +383,33 @@ void of_set_phy_supported(struct phy_device *phydev) __set_phy_supported(phydev, max_speed); } +void of_set_phy_eee_broken(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 broken = 0; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (of_property_read_bool(node, "eee-broken-100tx")) + broken |= MDIO_EEE_100TX; + if (of_property_read_bool(node, "eee-broken-1000t")) + broken |= MDIO_EEE_1000T; + if (of_property_read_bool(node, "eee-broken-10gt")) + broken |= MDIO_EEE_10GT; + if (of_property_read_bool(node, "eee-broken-1000kx")) + broken |= MDIO_EEE_1000KX; + if (of_property_read_bool(node, "eee-broken-10gkx4")) + broken |= MDIO_EEE_10GKX4; + if (of_property_read_bool(node, "eee-broken-10gkr")) + broken |= MDIO_EEE_10GKR; + + phydev->eee_broken_modes = broken; +} + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 64497ec293e1..a752de2fff5e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -30,7 +30,6 @@ #include #include #include -#include MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); @@ -2079,33 +2078,6 @@ bool phy_validate_pause(struct phy_device *phydev, } EXPORT_SYMBOL(phy_validate_pause); -static void of_set_phy_eee_broken(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 broken = 0; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (of_property_read_bool(node, "eee-broken-100tx")) - broken |= MDIO_EEE_100TX; - if (of_property_read_bool(node, "eee-broken-1000t")) - broken |= MDIO_EEE_1000T; - if (of_property_read_bool(node, "eee-broken-10gt")) - broken |= MDIO_EEE_10GT; - if (of_property_read_bool(node, "eee-broken-1000kx")) - broken |= MDIO_EEE_1000KX; - if (of_property_read_bool(node, "eee-broken-10gkx4")) - broken |= MDIO_EEE_10GKX4; - if (of_property_read_bool(node, "eee-broken-10gkr")) - broken |= MDIO_EEE_10GKR; - - phydev->eee_broken_modes = broken; -} - static bool phy_drv_supports_irq(struct phy_driver *phydrv) { return phydrv->config_intr && phydrv->ack_interrupt; diff --git a/include/linux/phy.h b/include/linux/phy.h index 20344c7744d8..1a1d93a2a906 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -674,6 +674,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); void of_set_phy_supported(struct phy_device *phydev); +void of_set_phy_eee_broken(struct phy_device *phydev); static inline bool __phy_is_started(struct phy_device *phydev) { From ac3f5533343f6ec7fa24a27f0ae22bbfd27e0b23 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:28 +0100 Subject: [PATCH 1118/1436] net: phy: Extract genphy_c45_pma_read_abilities from marvell10g Marvell 10G PHY driver has a generic way of initializing the supported link modes by reading the PHY's C45 PMA abilities. This can be made generic, since these registers are part of the 802.3 specifications. This commit extracts the config_init link_mode initialization code from marvell10g and uses it to introduce the genphy_c45_pma_read_abilities function. Only PMA modes are read, it's still up to the caller to set the Pause parameters. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 78 ++++-------------------------------- drivers/net/phy/phy-c45.c | 74 ++++++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 3 files changed, 83 insertions(+), 70 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 08362dc657cd..496805c0ddfe 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -233,8 +233,7 @@ static int mv3310_resume(struct phy_device *phydev) static int mv3310_config_init(struct phy_device *phydev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; - int val; + int ret, val; /* Check that the PHY interface type is compatible */ if (phydev->interface != PHY_INTERFACE_MODE_SGMII && @@ -243,8 +242,8 @@ static int mv3310_config_init(struct phy_device *phydev) phydev->interface != PHY_INTERFACE_MODE_10GKR) return -ENODEV; - __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); + __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); @@ -252,74 +251,13 @@ static int mv3310_config_init(struct phy_device *phydev) return val; if (val & MDIO_AN_STAT1_ABLE) - __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->supported); } - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); - if (val < 0) - return val; - - /* Ethtool does not support the WAN mode bits */ - if (val & (MDIO_PMA_STAT2_10GBSR | MDIO_PMA_STAT2_10GBLR | - MDIO_PMA_STAT2_10GBER | MDIO_PMA_STAT2_10GBLX4 | - MDIO_PMA_STAT2_10GBSW | MDIO_PMA_STAT2_10GBLW | - MDIO_PMA_STAT2_10GBEW)) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBSR) - __set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBLR) - __set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBER) - __set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported); - - if (val & MDIO_PMA_STAT2_EXTABLE) { - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); - if (val < 0) - return val; - - if (val & (MDIO_PMA_EXTABLE_10GBT | MDIO_PMA_EXTABLE_1000BT | - MDIO_PMA_EXTABLE_100BTX | MDIO_PMA_EXTABLE_10BT)) - __set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & (MDIO_PMA_EXTABLE_10GBKX4 | MDIO_PMA_EXTABLE_10GBKR | - MDIO_PMA_EXTABLE_1000BKX)) - __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBT) - __set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKX4) - __set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKR) - __set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BT) - __set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BKX) - __set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_100BTX) { - __set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - supported); - } - if (val & MDIO_PMA_EXTABLE_10BT) { - __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, - supported); - } - } - - linkmode_copy(phydev->supported, supported); - linkmode_and(phydev->advertising, phydev->advertising, - phydev->supported); + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; return 0; } diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index eff9e5a4d831..6f028de4dae1 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -271,6 +271,80 @@ int genphy_c45_read_mdix(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); +/** + * genphy_c45_pma_read_abilities - read supported link modes from PMA + * @phydev: target phy_device struct + * + * Read the supported link modes from the PMA Status 2 (1.8) register. If bit + * 1.8.9 is set, the list of supported modes is build using the values in the + * PMA Extended Abilities (1.11) register, indicating 1000BASET an 10G related + * modes. If bit 1.11.14 is set, then the list is also extended with the modes + * in the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and + * 5GBASET are supported. + */ +int genphy_c45_pma_read_abilities(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBSR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBLR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBER); + + if (val & MDIO_PMA_STAT2_EXTABLE) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBLRM); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKX4); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKR); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BKX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + } + + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities); + /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev) diff --git a/include/linux/phy.h b/include/linux/phy.h index 1a1d93a2a906..177a330d84e5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1116,6 +1116,7 @@ int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); +int genphy_c45_pma_read_abilities(struct phy_device *phydev); /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); From 7fd8afa8933a095a97995885740999f174e61b60 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:29 +0100 Subject: [PATCH 1119/1436] net: phy: Add generic support for 2.5GBaseT and 5GBaseT The 802.3bz specification, based on previous by the NBASET alliance, defines the 2.5GBaseT and 5GBaseT link modes for ethernet traffic on cat5e, cat6 and cat7 cables. These mode integrate with the already defined C45 MDIO PMA/PMD registers set that added 10G support, by defining some previously reserved bits, and adding a new register (2.5G/5G Extended abilities). This commit adds the required definitions in include/uapi/linux/mdio.h to support these modes, and detect when a link-partner advertises them. It also adds support for these mode in the generic C45 PHY infrastructure. Signed-off-by: Maxime Chevallier Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 37 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/mdio.h | 16 ++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 6f028de4dae1..7af5fa81daf6 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -47,6 +47,16 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) /* Assume 1000base-T */ ctrl2 |= MDIO_PMA_CTRL2_1000BT; break; + case SPEED_2500: + ctrl1 |= MDIO_CTRL1_SPEED2_5G; + /* Assume 2.5Gbase-T */ + ctrl2 |= MDIO_PMA_CTRL2_2_5GBT; + break; + case SPEED_5000: + ctrl1 |= MDIO_CTRL1_SPEED5G; + /* Assume 5Gbase-T */ + ctrl2 |= MDIO_PMA_CTRL2_5GBT; + break; case SPEED_10000: ctrl1 |= MDIO_CTRL1_SPEED10G; /* Assume 10Gbase-T */ @@ -194,6 +204,12 @@ int genphy_c45_read_lpa(struct phy_device *phydev) if (val < 0) return val; + if (val & MDIO_AN_10GBT_STAT_LP2_5G) + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->lp_advertising); + if (val & MDIO_AN_10GBT_STAT_LP5G) + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->lp_advertising); if (val & MDIO_AN_10GBT_STAT_LP10G) linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, phydev->lp_advertising); @@ -224,6 +240,12 @@ int genphy_c45_read_pma(struct phy_device *phydev) case MDIO_PMA_CTRL1_SPEED1000: phydev->speed = SPEED_1000; break; + case MDIO_CTRL1_SPEED2_5G: + phydev->speed = SPEED_2500; + break; + case MDIO_CTRL1_SPEED5G: + phydev->speed = SPEED_5000; + break; case MDIO_CTRL1_SPEED10G: phydev->speed = SPEED_10000; break; @@ -339,6 +361,21 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev) linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, phydev->supported, val & MDIO_PMA_EXTABLE_10BT); + + if (val & MDIO_PMA_EXTABLE_NBT) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + MDIO_PMA_NG_EXTABLE); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_NG_EXTABLE_2_5GBT); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_NG_EXTABLE_5GBT); + } } return 0; diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 0e012b168e4d..0a552061ff1c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -45,6 +45,7 @@ #define MDIO_AN_ADVERTISE 16 /* AN advertising (base page) */ #define MDIO_AN_LPA 19 /* AN LP abilities (base page) */ #define MDIO_PCS_EEE_ABLE 20 /* EEE Capability register */ +#define MDIO_PMA_NG_EXTABLE 21 /* 2.5G/5G PMA/PMD extended ability */ #define MDIO_PCS_EEE_WK_ERR 22 /* EEE wake error counter */ #define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */ #define MDIO_AN_EEE_ADV 60 /* EEE advertisement */ @@ -92,6 +93,10 @@ #define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) /* 10PASS-TS/2BASE-TL */ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) +/* 2.5 Gb/s */ +#define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) +/* 5 Gb/s */ +#define MDIO_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) /* Status register 1. */ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ @@ -145,6 +150,8 @@ #define MDIO_PMA_CTRL2_1000BKX 0x000d /* 1000BASE-KX type */ #define MDIO_PMA_CTRL2_100BTX 0x000e /* 100BASE-TX type */ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ +#define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ +#define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -198,6 +205,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ #define MDIO_PHYXS_LNSTAT_SYNC0 0x0001 @@ -234,9 +242,13 @@ #define MDIO_PCS_10GBRT_STAT2_BER 0x3f00 /* AN 10GBASE-T control register. */ +#define MDIO_AN_10GBT_CTRL_ADV2_5G 0x0080 /* Advertise 2.5GBASE-T */ +#define MDIO_AN_10GBT_CTRL_ADV5G 0x0100 /* Advertise 5GBASE-T */ #define MDIO_AN_10GBT_CTRL_ADV10G 0x1000 /* Advertise 10GBASE-T */ /* AN 10GBASE-T status register. */ +#define MDIO_AN_10GBT_STAT_LP2_5G 0x0020 /* LP is 2.5GBT capable */ +#define MDIO_AN_10GBT_STAT_LP5G 0x0040 /* LP is 5GBT capable */ #define MDIO_AN_10GBT_STAT_LPTRR 0x0200 /* LP training reset req. */ #define MDIO_AN_10GBT_STAT_LPLTABLE 0x0400 /* LP loop timing ability */ #define MDIO_AN_10GBT_STAT_LP10G 0x0800 /* LP is 10GBT capable */ @@ -265,6 +277,10 @@ #define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ #define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ +/* 2.5G/5G Extended abilities register. */ +#define MDIO_PMA_NG_EXTABLE_2_5GBT 0x0001 /* 2.5GBASET ability */ +#define MDIO_PMA_NG_EXTABLE_5GBT 0x0002 /* 5GBASET ability */ + /* LASI RX_ALARM control/status registers. */ #define MDIO_PMA_LASI_RX_PHYXSLFLT 0x0001 /* PHY XS RX local fault */ #define MDIO_PMA_LASI_RX_PCSLFLT 0x0008 /* PCS RX local fault */ From fc228abc2347e106a44c0e9b29ab70b712c4ca51 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Feb 2019 18:47:30 +0800 Subject: [PATCH 1120/1436] sctp: call gso_reset_checksum when computing checksum in sctp_gso_segment Jianlin reported a panic when running sctp gso over gre over vlan device: [ 84.772930] RIP: 0010:do_csum+0x6d/0x170 [ 84.790605] Call Trace: [ 84.791054] csum_partial+0xd/0x20 [ 84.791657] gre_gso_segment+0x2c3/0x390 [ 84.792364] inet_gso_segment+0x161/0x3e0 [ 84.793071] skb_mac_gso_segment+0xb8/0x120 [ 84.793846] __skb_gso_segment+0x7e/0x180 [ 84.794581] validate_xmit_skb+0x141/0x2e0 [ 84.795297] __dev_queue_xmit+0x258/0x8f0 [ 84.795949] ? eth_header+0x26/0xc0 [ 84.796581] ip_finish_output2+0x196/0x430 [ 84.797295] ? skb_gso_validate_network_len+0x11/0x80 [ 84.798183] ? ip_finish_output+0x169/0x270 [ 84.798875] ip_output+0x6c/0xe0 [ 84.799413] ? ip_append_data.part.50+0xc0/0xc0 [ 84.800145] iptunnel_xmit+0x144/0x1c0 [ 84.800814] ip_tunnel_xmit+0x62d/0x930 [ip_tunnel] [ 84.801699] gre_tap_xmit+0xac/0xf0 [ip_gre] [ 84.802395] dev_hard_start_xmit+0xa5/0x210 [ 84.803086] sch_direct_xmit+0x14f/0x340 [ 84.803733] __dev_queue_xmit+0x799/0x8f0 [ 84.804472] ip_finish_output2+0x2e0/0x430 [ 84.805255] ? skb_gso_validate_network_len+0x11/0x80 [ 84.806154] ip_output+0x6c/0xe0 [ 84.806721] ? ip_append_data.part.50+0xc0/0xc0 [ 84.807516] sctp_packet_transmit+0x716/0xa10 [sctp] [ 84.808337] sctp_outq_flush+0xd7/0x880 [sctp] It was caused by SKB_GSO_CB(skb)->csum_start not set in sctp_gso_segment. sctp_gso_segment() calls skb_segment() with 'feature | NETIF_F_HW_CSUM', which causes SKB_GSO_CB(skb)->csum_start not to be set in skb_segment(). For TCP/UDP, when feature supports HW_CSUM, CHECKSUM_PARTIAL will be set and gso_reset_checksum will be called to set SKB_GSO_CB(skb)->csum_start. So SCTP should do the same as TCP/UDP, to call gso_reset_checksum() when computing checksum in sctp_gso_segment. Reported-by: Jianlin Shi Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 123e9f2dc226..edfcf16e704c 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } From af98c5a78517c04adb5fd68bb64b1ad6fe3d473f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Feb 2019 18:51:01 +0800 Subject: [PATCH 1121/1436] sctp: set stream ext to NULL after freeing it in sctp_stream_outq_migrate In sctp_stream_init(), after sctp_stream_outq_migrate() freed the surplus streams' ext, but sctp_stream_alloc_out() returns -ENOMEM, stream->outcnt will not be set to 'outcnt'. With the bigger value on stream->outcnt, when closing the assoc and freeing its streams, the ext of those surplus streams will be freed again since those stream exts were not set to NULL after freeing in sctp_stream_outq_migrate(). Then the invalid-free issue reported by syzbot would be triggered. We fix it by simply setting them to NULL after freeing. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: syzbot+58e480e7b28f2d890bfd@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f24633114dfd..2936ed17bf9e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -144,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, } } - for (i = outcnt; i < stream->outcnt; i++) + for (i = outcnt; i < stream->outcnt; i++) { kfree(SCTP_SO(stream, i)->ext); + SCTP_SO(stream, i)->ext = NULL; + } } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, From 3e0bd37ce0e4a574df6d87a901e13bcb46e10301 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:35 -0800 Subject: [PATCH 1122/1436] bpf: add plumbing for BPF_LWT_ENCAP_IP in bpf_lwt_push_encap This patch adds all needed plumbing in preparation to allowing bpf programs to do IP encapping via bpf_lwt_push_encap. Actual implementation is added in the next patch in the patchset. Of note: - bpf_lwt_push_encap can now be called from BPF_PROG_TYPE_LWT_XMIT prog types in addition to BPF_PROG_TYPE_LWT_IN; - if the skb being encapped has GSO set, encapsulation is limited to IPIP/IP+GRE/IP+GUE (both IPv4 and IPv6); - as route lookups are different for ingress vs egress, the single external bpf_lwt_push_encap BPF helper is routed internally to either bpf_lwt_in_push_encap or bpf_lwt_xmit_push_encap BPF_CALLs, depending on prog type. v8 changes: fixed a typo. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 26 ++++++++++++++++++++-- net/core/filter.c | 48 +++++++++++++++++++++++++++++++++++----- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25c8c0e62ecf..bcdd2474eee7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2016,6 +2016,19 @@ union bpf_attr { * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to LWT_BPF_MAX_HEADROOM total + * bytes in all prepended headers. Please note that + * if skb_is_gso(skb) is true, no more than two headers + * can be prepended, and the inner header, if present, + * should be either GRE or UDP/GUE. + * + * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of + * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called + * by bpf programs of types BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2517,7 +2530,8 @@ enum bpf_hdr_start_off { /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, }; #define __bpf_md_ptr(type, name) \ @@ -2606,7 +2620,15 @@ enum bpf_ret_code { BPF_DROP = 2, /* 3-6 reserved */ BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes */ + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, }; struct bpf_sock { diff --git a/net/core/filter.c b/net/core/filter.c index 353735575204..12c88c21b6b8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4815,7 +4815,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len } #endif /* CONFIG_IPV6_SEG6_BPF */ -BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) +static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, + bool ingress) +{ + return -EINVAL; /* Implemented in the next patch. */ +} +#endif + +BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, u32, len) { switch (type) { @@ -4823,14 +4831,41 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, case BPF_LWT_ENCAP_SEG6: case BPF_LWT_ENCAP_SEG6_INLINE: return bpf_push_seg6_encap(skb, type, hdr, len); +#endif +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) + case BPF_LWT_ENCAP_IP: + return bpf_push_ip_encap(skb, hdr, len, true /* ingress */); #endif default: return -EINVAL; } } -static const struct bpf_func_proto bpf_lwt_push_encap_proto = { - .func = bpf_lwt_push_encap, +BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type, + void *, hdr, u32, len) +{ + switch (type) { +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) + case BPF_LWT_ENCAP_IP: + return bpf_push_ip_encap(skb, hdr, len, false /* egress */); +#endif + default: + return -EINVAL; + } +} + +static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { + .func = bpf_lwt_in_push_encap, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE +}; + +static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { + .func = bpf_lwt_xmit_push_encap, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -5417,7 +5452,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_lwt_seg6_adjust_srh || func == bpf_lwt_seg6_action || #endif - func == bpf_lwt_push_encap) + func == bpf_lwt_in_push_encap || + func == bpf_lwt_xmit_push_encap) return true; return false; @@ -5815,7 +5851,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_lwt_push_encap: - return &bpf_lwt_push_encap_proto; + return &bpf_lwt_in_push_encap_proto; default: return lwt_out_func_proto(func_id, prog); } @@ -5851,6 +5887,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_l4_csum_replace_proto; case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; + case BPF_FUNC_lwt_push_encap: + return &bpf_lwt_xmit_push_encap_proto; default: return lwt_out_func_proto(func_id, prog); } From 52f278774e796a553be0c869dcaaee6f259ca795 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:36 -0800 Subject: [PATCH 1123/1436] bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap Implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap BPF helper. It enables BPF programs (specifically, BPF_PROG_TYPE_LWT_IN and BPF_PROG_TYPE_LWT_XMIT prog types) to add IP encapsulation headers to packets (e.g. IP/GRE, GUE, IPIP). This is useful when thousands of different short-lived flows should be encapped, each with different and dynamically determined destination. Although lwtunnels can be used in some of these scenarios, the ability to dynamically generate encap headers adds more flexibility, e.g. when routing depends on the state of the host (reflected in global bpf maps). v7 changes: - added a call skb_clear_hash(); - removed calls to skb_set_transport_header(); - refuse to encap GSO-enabled packets. v8 changes: - fix build errors when LWT is not enabled. Note: the next patch in the patchset with deal with GSO-enabled packets, which are currently rejected at encapping attempt. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- include/net/lwtunnel.h | 2 ++ net/core/filter.c | 3 +- net/core/lwt_bpf.c | 65 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 33fd9ba7e0e5..671113bcb2cc 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); +int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, + bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { diff --git a/net/core/filter.c b/net/core/filter.c index 12c88c21b6b8..a78deb2656e1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -73,6 +73,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -4819,7 +4820,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) { - return -EINVAL; /* Implemented in the next patch. */ + return bpf_lwt_push_ip_encap(skb, hdr, len, ingress); } #endif diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a648568c5e8f..e5a9850d9f48 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -390,6 +390,71 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = { .owner = THIS_MODULE, }; +static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) +{ + /* Handling of GSO-enabled packets is added in the next patch. */ + return -EOPNOTSUPP; +} + +int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) +{ + struct iphdr *iph; + bool ipv4; + int err; + + if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) + return -EINVAL; + + /* validate protocol and length */ + iph = (struct iphdr *)hdr; + if (iph->version == 4) { + ipv4 = true; + if (unlikely(len < iph->ihl * 4)) + return -EINVAL; + } else if (iph->version == 6) { + ipv4 = false; + if (unlikely(len < sizeof(struct ipv6hdr))) + return -EINVAL; + } else { + return -EINVAL; + } + + if (ingress) + err = skb_cow_head(skb, len + skb->mac_len); + else + err = skb_cow_head(skb, + len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + return err; + + /* push the encap headers and fix pointers */ + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb_push(skb, len); + if (ingress) + skb_postpush_rcsum(skb, iph, len); + skb_reset_network_header(skb); + memcpy(skb_network_header(skb), hdr, len); + bpf_compute_data_pointers(skb); + skb_clear_hash(skb); + + if (ipv4) { + skb->protocol = htons(ETH_P_IP); + iph = ip_hdr(skb); + + if (!iph->check) + iph->check = ip_fast_csum((unsigned char *)iph, + iph->ihl); + } else { + skb->protocol = htons(ETH_P_IPV6); + } + + if (skb_is_gso(skb)) + return handle_gso_encap(skb, ipv4, len); + + return 0; +} + static int __init bpf_lwt_init(void) { return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); From ca78801a81e04a31f8088e96b2649a9cbace5499 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:37 -0800 Subject: [PATCH 1124/1436] bpf: handle GSO in bpf_lwt_push_encap This patch adds handling of GSO packets in bpf_lwt_push_ip_encap() (called from bpf_lwt_push_encap): * IPIP, GRE, and UDP encapsulation types are deduced by looking into iphdr->protocol or ipv6hdr->next_header; * SCTP GSO packets are not supported (as bpf_skb_proto_4_to_6 and similar do); * UDP_L4 GSO packets are also not supported (although they are not blocked in bpf_skb_proto_4_to_6 and similar), as skb_decrease_gso_size() will break it; * SKB_GSO_DODGY bit is set. Note: it may be possible to support SCTP and UDP_L4 gso packets; but as these cases seem to be not well handled by other tunneling/encapping code paths, the solution should be generic enough to apply to all tunneling/encapping code. v8 changes: - make sure that if GRE or UDP encap is detected, there is enough of pushed bytes to cover both IP[v6] + GRE|UDP headers; - do not reject double-encapped packets; - whitelist TCP GSO packets rather than block SCTP GSO and UDP GSO. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- net/core/lwt_bpf.c | 67 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e5a9850d9f48..079871fc020f 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -16,6 +16,7 @@ #include #include #include +#include struct bpf_lwt_prog { struct bpf_prog *prog; @@ -390,10 +391,72 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = { .owner = THIS_MODULE, }; +static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, + int encap_len) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + gso_type |= SKB_GSO_DODGY; + shinfo->gso_type |= gso_type; + skb_decrease_gso_size(shinfo, encap_len); + shinfo->gso_segs = 0; + return 0; +} + static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) { - /* Handling of GSO-enabled packets is added in the next patch. */ - return -EOPNOTSUPP; + int next_hdr_offset; + void *next_hdr; + __u8 protocol; + + /* SCTP and UDP_L4 gso need more nuanced handling than what + * handle_gso_type() does above: skb_decrease_gso_size() is not enough. + * So at the moment only TCP GSO packets are let through. + */ + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -ENOTSUPP; + + if (ipv4) { + protocol = ip_hdr(skb)->protocol; + next_hdr_offset = sizeof(struct iphdr); + next_hdr = skb_network_header(skb) + next_hdr_offset; + } else { + protocol = ipv6_hdr(skb)->nexthdr; + next_hdr_offset = sizeof(struct ipv6hdr); + next_hdr = skb_network_header(skb) + next_hdr_offset; + } + + switch (protocol) { + case IPPROTO_GRE: + next_hdr_offset += sizeof(struct gre_base_hdr); + if (next_hdr_offset > encap_len) + return -EINVAL; + + if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) + return handle_gso_type(skb, SKB_GSO_GRE_CSUM, + encap_len); + return handle_gso_type(skb, SKB_GSO_GRE, encap_len); + + case IPPROTO_UDP: + next_hdr_offset += sizeof(struct udphdr); + if (next_hdr_offset > encap_len) + return -EINVAL; + + if (((struct udphdr *)next_hdr)->check) + return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, + encap_len); + return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); + + case IPPROTO_IP: + case IPPROTO_IPV6: + if (ipv4) + return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); + else + return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); + + default: + return -EPROTONOSUPPORT; + } } int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) From 9b0a6a9dbab0ae092d033e67dc2701e8a7b09cdb Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:38 -0800 Subject: [PATCH 1125/1436] ipv6_stub: add ipv6_route_input stub/proxy. Proxy ip6_route_input via ipv6_stub, for later use by lwt bpf ip encap (see the next patch in the patchset). Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- include/net/addrconf.h | 1 + net/ipv6/addrconf_core.c | 6 ++++++ net/ipv6/af_inet6.c | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 20d523ee2fec..269ec27385e9 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -248,6 +248,7 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); + int (*ipv6_route_input)(struct sk_buff *skb); struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); struct fib6_info *(*fib6_lookup)(struct net *net, int oif, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 5cd0029d930e..6c79af056d9b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, return -EAFNOSUPPORT; } +static int eafnosupport_ipv6_route_input(struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} + static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) { return NULL; @@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, + .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d99753b5e39b..2f45d2a3e3a3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; +static int ipv6_route_input(struct sk_buff *skb) +{ + ip6_route_input(skb); + return skb_dst(skb)->error; +} + static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup = ip6_dst_lookup, + .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, From 3bd0b15281af776650e1550be4ea655b8cfa10c8 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:39 -0800 Subject: [PATCH 1126/1436] bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c This patch builds on top of the previous patch in the patchset, which added BPF_LWT_ENCAP_IP mode to bpf_lwt_push_encap. As the encapping can result in the skb needing to go via a different interface/route/dst, bpf programs can indicate this by returning BPF_LWT_REROUTE, which triggers a new route lookup for the skb. v8 changes: fix kbuild errors when LWTUNNEL_BPF is builtin, but IPV6 is a module: as LWTUNNEL_BPF can only be either Y or N, call IPV6 routing functions only if they are built-in. v9 changes: - fixed a kbuild test robot compiler warning; - call IPV6 routing functions via ipv6_stub. v10 changes: removed unnecessary IS_ENABLED and pr_warn_once. v11 changes: fixed a potential dst leak. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- net/core/lwt_bpf.c | 126 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 2 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 079871fc020f..32251f3fcda0 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -17,6 +17,7 @@ #include #include #include +#include struct bpf_lwt_prog { struct bpf_prog *prog; @@ -56,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, switch (ret) { case BPF_OK: + case BPF_LWT_REROUTE: break; case BPF_REDIRECT: @@ -88,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, return ret; } +static int bpf_lwt_input_reroute(struct sk_buff *skb) +{ + int err = -EINVAL; + + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb_dst(skb)->dev); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + err = ipv6_stub->ipv6_route_input(skb); + } else { + err = -EAFNOSUPPORT; + } + + if (err) + goto err; + return dst_input(skb); + +err: + kfree_skb(skb); + return err; +} + static int bpf_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -99,11 +125,11 @@ static int bpf_input(struct sk_buff *skb) ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); if (ret < 0) return ret; + if (ret == BPF_LWT_REROUTE) + return bpf_lwt_input_reroute(skb); } if (unlikely(!dst->lwtstate->orig_input)) { - pr_warn_once("orig_input not set on dst for prog %s\n", - bpf->out.name); kfree_skb(skb); return -EINVAL; } @@ -148,6 +174,91 @@ static int xmit_check_hhlen(struct sk_buff *skb) return 0; } +static int bpf_lwt_xmit_reroute(struct sk_buff *skb) +{ + struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); + int oif = l3mdev ? l3mdev->ifindex : 0; + struct dst_entry *dst = NULL; + struct sock *sk; + struct net *net; + bool ipv4; + int err; + + if (skb->protocol == htons(ETH_P_IP)) + ipv4 = true; + else if (skb->protocol == htons(ETH_P_IPV6)) + ipv4 = false; + else + return -EAFNOSUPPORT; + + sk = sk_to_full_sk(skb->sk); + if (sk) { + if (sk->sk_bound_dev_if) + oif = sk->sk_bound_dev_if; + net = sock_net(sk); + } else { + net = dev_net(skb_dst(skb)->dev); + } + + if (ipv4) { + struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = {}; + struct rtable *rt; + + fl4.flowi4_oif = oif; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = sock_net_uid(net, sk); + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; + fl4.flowi4_proto = iph->protocol; + fl4.daddr = iph->daddr; + fl4.saddr = iph->saddr; + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return -EINVAL; + dst = &rt->dst; + } else { + struct ipv6hdr *iph6 = ipv6_hdr(skb); + struct flowi6 fl6 = {}; + + fl6.flowi6_oif = oif; + fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(net, sk); + fl6.flowlabel = ip6_flowinfo(iph6); + fl6.flowi6_proto = iph6->nexthdr; + fl6.daddr = iph6->daddr; + fl6.saddr = iph6->saddr; + + err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); + if (err || IS_ERR(dst)) + return -EINVAL; + } + if (unlikely(dst->error)) { + dst_release(dst); + return -EINVAL; + } + + /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it + * was done for the previous dst, so we are doing it here again, in + * case the new dst needs much more space. The call below is a noop + * if there is enough header space in skb. + */ + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + return err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); + if (unlikely(err)) + return err; + + /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ + return LWTUNNEL_XMIT_DONE; +} + static int bpf_xmit(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -155,11 +266,20 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + __be16 proto = skb->protocol; int ret; ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); switch (ret) { case BPF_OK: + /* If the header changed, e.g. via bpf_lwt_push_encap, + * BPF_LWT_REROUTE below should have been used if the + * protocol was also changed. + */ + if (skb->protocol != proto) { + kfree_skb(skb); + return -EINVAL; + } /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ @@ -170,6 +290,8 @@ static int bpf_xmit(struct sk_buff *skb) return LWTUNNEL_XMIT_CONTINUE; case BPF_REDIRECT: return LWTUNNEL_XMIT_DONE; + case BPF_LWT_REROUTE: + return bpf_lwt_xmit_reroute(skb); default: return ret; } From 755db4771c96cf49e60e188a2c90c4d2a1ea063d Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:40 -0800 Subject: [PATCH 1127/1436] bpf: sync /include/.../bpf.h with tools/include/.../bpf.h This patch copies changes in bpf.h done by a previous patch in this patchset from the kernel uapi include dir into tools uapi include dir. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 25c8c0e62ecf..bcdd2474eee7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2016,6 +2016,19 @@ union bpf_attr { * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to LWT_BPF_MAX_HEADROOM total + * bytes in all prepended headers. Please note that + * if skb_is_gso(skb) is true, no more than two headers + * can be prepended, and the inner header, if present, + * should be either GRE or UDP/GUE. + * + * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of + * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called + * by bpf programs of types BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2517,7 +2530,8 @@ enum bpf_hdr_start_off { /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, }; #define __bpf_md_ptr(type, name) \ @@ -2606,7 +2620,15 @@ enum bpf_ret_code { BPF_DROP = 2, /* 3-6 reserved */ BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes */ + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, }; struct bpf_sock { From 0fde56e4385b09a67dd25321f607d4c942282de2 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 13 Feb 2019 11:53:41 -0800 Subject: [PATCH 1128/1436] selftests: bpf: add test_lwt_ip_encap selftest This patch adds a bpf self-test to cover BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap. Covered: - encapping in LWT_IN and LWT_XMIT - IPv4 and IPv6 A follow-up patch will add GSO and VRF-enabled tests. Signed-off-by: Peter Oskolkov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/progs/test_lwt_ip_encap.c | 85 +++++ .../selftests/bpf/test_lwt_ip_encap.sh | 311 ++++++++++++++++++ 3 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c create mode 100755 tools/testing/selftests/bpf/test_lwt_ip_encap.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c3edf47da05d..ccffaa0a0787 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -50,7 +50,8 @@ TEST_PROGS := test_kmod.sh \ test_lirc_mode2.sh \ test_skb_cgroup_id.sh \ test_flow_dissector.sh \ - test_xdp_vlan.sh + test_xdp_vlan.sh \ + test_lwt_ip_encap.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c new file mode 100644 index 000000000000..c957d6dfe6d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct grehdr { + __be16 flags; + __be16 protocol; +}; + +SEC("encap_gre") +int bpf_lwt_encap_gre(struct __sk_buff *skb) +{ + struct encap_hdr { + struct iphdr iph; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.iph.ihl = 5; + hdr.iph.version = 4; + hdr.iph.ttl = 0x40; + hdr.iph.protocol = 47; /* IPPROTO_GRE */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */ + hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */ +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */ + hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */ +#else +#error "Fix your compiler's __BYTE_ORDER__?!" +#endif + hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr)); + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +SEC("encap_gre6") +int bpf_lwt_encap_gre6(struct __sk_buff *skb) +{ + struct encap_hdr { + struct ipv6hdr ip6hdr; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.ip6hdr.version = 6; + hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr)); + hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */ + hdr.ip6hdr.hop_limit = 0x40; + /* fb01::1 */ + hdr.ip6hdr.saddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.saddr.s6_addr[1] = 1; + hdr.ip6hdr.saddr.s6_addr[15] = 1; + /* fb10::1 */ + hdr.ip6hdr.daddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.daddr.s6_addr[1] = 0x10; + hdr.ip6hdr.daddr.s6_addr[15] = 1; + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh new file mode 100755 index 000000000000..4ca714e23ab0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Setup/topology: +# +# NS1 NS2 NS3 +# veth1 <---> veth2 veth3 <---> veth4 (the top route) +# veth5 <---> veth6 veth7 <---> veth8 (the bottom route) +# +# each vethN gets IPv[4|6]_N address +# +# IPv*_SRC = IPv*_1 +# IPv*_DST = IPv*_4 +# +# all tests test pings from IPv*_SRC to IPv*_DST +# +# by default, routes are configured to allow packets to go +# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) +# +# a GRE device is installed in NS3 with IPv*_GRE, and +# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 +# (the bottom route) +# +# Tests: +# +# 1. routes NS2->IPv*_DST are brought down, so the only way a ping +# from IP*_SRC to IP*_DST can work is via IPv*_GRE +# +# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 +# that encaps the packets with an IP/GRE header to route to IPv*_GRE +# +# ping: SRC->[encap at veth1:egress]->GRE:decap->DST +# ping replies go DST->SRC directly +# +# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 +# that encaps the packets with an IP/GRE header to route to IPv*_GRE +# +# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST +# ping replies go DST->SRC directly + +set -e # exit on error + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +readonly NS1="ns1-$(mktemp -u XXXXXX)" +readonly NS2="ns2-$(mktemp -u XXXXXX)" +readonly NS3="ns3-$(mktemp -u XXXXXX)" + +readonly IPv4_1="172.16.1.100" +readonly IPv4_2="172.16.2.100" +readonly IPv4_3="172.16.3.100" +readonly IPv4_4="172.16.4.100" +readonly IPv4_5="172.16.5.100" +readonly IPv4_6="172.16.6.100" +readonly IPv4_7="172.16.7.100" +readonly IPv4_8="172.16.8.100" +readonly IPv4_GRE="172.16.16.100" + +readonly IPv4_SRC=$IPv4_1 +readonly IPv4_DST=$IPv4_4 + +readonly IPv6_1="fb01::1" +readonly IPv6_2="fb02::1" +readonly IPv6_3="fb03::1" +readonly IPv6_4="fb04::1" +readonly IPv6_5="fb05::1" +readonly IPv6_6="fb06::1" +readonly IPv6_7="fb07::1" +readonly IPv6_8="fb08::1" +readonly IPv6_GRE="fb10::1" + +readonly IPv6_SRC=$IPv6_1 +readonly IPv6_DST=$IPv6_4 + +setup() { +set -e # exit on error + # create devices and namespaces + ip netns add "${NS1}" + ip netns add "${NS2}" + ip netns add "${NS3}" + + ip link add veth1 type veth peer name veth2 + ip link add veth3 type veth peer name veth4 + ip link add veth5 type veth peer name veth6 + ip link add veth7 type veth peer name veth8 + + ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip link set veth1 netns ${NS1} + ip link set veth2 netns ${NS2} + ip link set veth3 netns ${NS2} + ip link set veth4 netns ${NS3} + ip link set veth5 netns ${NS1} + ip link set veth6 netns ${NS2} + ip link set veth7 netns ${NS2} + ip link set veth8 netns ${NS3} + + # configure addesses: the top route (1-2-3-4) + ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 + ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 + ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 + ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 + ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 + ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 + ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 + ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 + + # configure addresses: the bottom route (5-6-7-8) + ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 + ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 + ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 + ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 + ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 + ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 + ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 + ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 + + + ip -netns ${NS1} link set dev veth1 up + ip -netns ${NS2} link set dev veth2 up + ip -netns ${NS2} link set dev veth3 up + ip -netns ${NS3} link set dev veth4 up + ip -netns ${NS1} link set dev veth5 up + ip -netns ${NS2} link set dev veth6 up + ip -netns ${NS2} link set dev veth7 up + ip -netns ${NS3} link set dev veth8 up + + # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; + # the bottom route to specific bottom addresses + + # NS1 + # top route + ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 + ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} # go top by default + ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 + ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} # go top by default + # bottom route + ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 + ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 + ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} + ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} + + # NS2 + # top route + ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 + ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 + ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 + # bottom route + ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 + ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 + ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 + ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 + + # NS3 + # top route + ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 + ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} + ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} + ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 + ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} + ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} + # bottom route + ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 + ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} + ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} + ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 + ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} + ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} + + # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route + ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 + ip -netns ${NS3} link set gre_dev up + ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev + ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} + ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} + + + # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route + ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 + ip -netns ${NS3} link set gre6_dev up + ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev + ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} + ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} + + # rp_filter gets confused by what these tests are doing, so disable it + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 +} + +cleanup() { + ip netns del ${NS1} 2> /dev/null + ip netns del ${NS2} 2> /dev/null + ip netns del ${NS3} 2> /dev/null +} + +trap cleanup EXIT + +test_ping() { + local readonly PROTO=$1 + local readonly EXPECTED=$2 + local RET=0 + + set +e + if [ "${PROTO}" == "IPv4" ] ; then + ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null + RET=$? + elif [ "${PROTO}" == "IPv6" ] ; then + ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null + RET=$? + else + echo "test_ping: unknown PROTO: ${PROTO}" + exit 1 + fi + set -e + + if [ "0" != "${RET}" ]; then + RET=1 + fi + + if [ "${EXPECTED}" != "${RET}" ] ; then + echo "FAIL: test_ping: ${RET}" + exit 1 + fi +} + +test_egress() { + local readonly ENCAP=$1 + echo "starting egress ${ENCAP} encap test" + setup + + # need to wait a bit for IPv6 to autoconf, otherwise + # ping6 sometimes fails with "unable to bind to address" + + # by default, pings work + test_ping IPv4 0 + test_ping IPv6 0 + + # remove NS2->DST routes, ping fails + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + test_ping IPv4 1 + test_ping IPv6 1 + + # install replacement routes (LWT/eBPF), pings succeed + if [ "${ENCAP}" == "IPv4" ] ; then + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1 + elif [ "${ENCAP}" == "IPv6" ] ; then + ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 + ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 + else + echo "FAIL: unknown encap ${ENCAP}" + fi + test_ping IPv4 0 + test_ping IPv6 0 + + cleanup + echo "PASS" +} + +test_ingress() { + local readonly ENCAP=$1 + echo "starting ingress ${ENCAP} encap test" + setup + + # need to wait a bit for IPv6 to autoconf, otherwise + # ping6 sometimes fails with "unable to bind to address" + + # by default, pings work + test_ping IPv4 0 + test_ping IPv6 0 + + # remove NS2->DST routes, pings fail + ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 + ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 + test_ping IPv4 1 + test_ping IPv6 1 + + # install replacement routes (LWT/eBPF), pings succeed + if [ "${ENCAP}" == "IPv4" ] ; then + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2 + elif [ "${ENCAP}" == "IPv6" ] ; then + ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 + ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2 + else + echo "FAIL: unknown encap ${ENCAP}" + fi + test_ping IPv4 0 + test_ping IPv6 0 + + cleanup + echo "PASS" +} + +test_egress IPv4 +test_egress IPv6 + +test_ingress IPv4 +test_ingress IPv6 + +echo "all tests passed" From b79555d5d8d32643e9d7193341dcaff13bf9ffcd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 Feb 2019 19:56:15 +0100 Subject: [PATCH 1129/1436] net: phy: fix interrupt handling in non-started states phylib enables interrupts before phy_start() has been called, and if we receive an interrupt in a non-started state, the interrupt handler returns IRQ_NONE. This causes problems with at least one Marvell chip as reported by Andrew. Fix this by handling interrupts the same as in phy_mac_interrupt(), basically always running the phylib state machine. It knows when it has to do something and when not. This change allows to handle interrupts gracefully even if they occur in a non-started state. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Reported-by: Andrew Lunn Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 189cd2048c3a..ca5e0c0f018c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -762,9 +762,6 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; - if (!phy_is_started(phydev)) - return IRQ_NONE; /* It can't be ours. */ - if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) return IRQ_NONE; From 7c0db24cc431e2196d98a5d5ddaa9088e2fcbfe5 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 11 Feb 2019 13:40:21 -0500 Subject: [PATCH 1130/1436] dsa: mv88e6xxx: Ensure all pending interrupts are handled prior to exit The GPIO interrupt controller on the espressobin board only supports edge interrupts. If one enables the use of hardware interrupts in the device tree for the 88E6341, it is possible to miss an edge. When this happens, the INTn pin on the Marvell switch is stuck low and no further interrupts occur. I found after adding debug statements to mv88e6xxx_g1_irq_thread_work() that there is a race in handling device interrupts (e.g. PHY link interrupts). Some interrupts are directly cleared by reading the Global 1 status register. However, the device interrupt flag, for example, is not cleared until all the unmasked SERDES and PHY ports are serviced. This is done by reading the relevant SERDES and PHY status register. The code only services interrupts whose status bit is set at the time of reading its status register. If an interrupt event occurs after its status is read and before all interrupts are serviced, then this event will not be serviced and the INTn output pin will remain low. This is not a problem with polling or level interrupts since the handler will be called again to process the event. However, it's a big problem when using level interrupts. The fix presented here is to add a loop around the code servicing switch interrupts. If any pending interrupts remain after the current set has been handled, we loop and process the new set. If there are no pending interrupts after servicing, we are sure that INTn has gone high and we will get an edge when a new event occurs. Tested on espressobin board. Fixes: dc30c35be720 ("net: dsa: mv88e6xxx: Implement interrupt support.") Signed-off-by: John David Anglin Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8dca2c949e73..12fd7ce3f1ff 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -261,6 +261,7 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip) unsigned int sub_irq; unsigned int n; u16 reg; + u16 ctl1; int err; mutex_lock(&chip->reg_lock); @@ -270,13 +271,28 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip) if (err) goto out; - for (n = 0; n < chip->g1_irq.nirqs; ++n) { - if (reg & (1 << n)) { - sub_irq = irq_find_mapping(chip->g1_irq.domain, n); - handle_nested_irq(sub_irq); - ++nhandled; + do { + for (n = 0; n < chip->g1_irq.nirqs; ++n) { + if (reg & (1 << n)) { + sub_irq = irq_find_mapping(chip->g1_irq.domain, + n); + handle_nested_irq(sub_irq); + ++nhandled; + } } - } + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &ctl1); + if (err) + goto unlock; + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, ®); +unlock: + mutex_unlock(&chip->reg_lock); + if (err) + goto out; + ctl1 &= GENMASK(chip->g1_irq.nirqs, 0); + } while (reg & ctl1); + out: return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } From 46befd3249d72fb8dfce6d1c019511c247245ba6 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:47:31 +0800 Subject: [PATCH 1131/1436] net: neterion: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/s2io.c | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 82be90075695..feda9644289d 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -3055,7 +3055,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data) /* Updating the statistics block */ swstats->mem_freed += skb->truesize; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); get_info.offset++; if (get_info.offset == get_info.fifo_len + 1) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 0da7393b2ef3..b877acec5cde 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -114,7 +114,7 @@ static inline void VXGE_COMPLETE_VPATH_TX(struct vxge_fifo *fifo) /* free SKBs */ for (temp = completed; temp != skb_ptr; temp++) - dev_kfree_skb_irq(*temp); + dev_consume_skb_irq(*temp); } while (more); } From eae15bdc2a0c13e5d908e5c41702fdf0076e218a Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:49:57 +0800 Subject: [PATCH 1132/1436] net: qualcomm: emac: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in emac_mac_tx_process() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 8d790313ee3d..20d2400ad300 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1204,7 +1204,7 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) if (tpbuf->skb) { pkts_compl++; bytes_compl += tpbuf->skb->len; - dev_kfree_skb_irq(tpbuf->skb); + dev_consume_skb_irq(tpbuf->skb); tpbuf->skb = NULL; } From d270f67d325ca709f3f98d5965e401235e6e4422 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:51:45 +0800 Subject: [PATCH 1133/1436] net: atheros: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 2 +- drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 3164aad29bcf..9dfe6a9431e6 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1259,7 +1259,7 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter) } if (tx_buffer->skb) { - dev_kfree_skb_irq(tx_buffer->skb); + dev_consume_skb_irq(tx_buffer->skb); tx_buffer->skb = NULL; } diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 63edc5706c09..9e07b469066a 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2088,7 +2088,7 @@ static int atl1_intr_tx(struct atl1_adapter *adapter) } if (buffer_info->skb) { - dev_kfree_skb_irq(buffer_info->skb); + dev_consume_skb_irq(buffer_info->skb); buffer_info->skb = NULL; } From 5f5a8c75daed965a3631ab98a783846260d2729d Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:52:53 +0800 Subject: [PATCH 1134/1436] net: apple: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in mace_interrupt() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/apple/mace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index 68b9ee489489..4d9819d2894d 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -764,7 +764,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) dev->stats.tx_bytes += mp->tx_bufs[i]->len; ++dev->stats.tx_packets; } - dev_kfree_skb_irq(mp->tx_bufs[i]); + dev_consume_skb_irq(mp->tx_bufs[i]); --mp->tx_active; if (++i >= N_TX_RING) i = 0; From 412261d5c901472b025c8dcea173997209f28ab2 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:56:00 +0800 Subject: [PATCH 1135/1436] net: moxa: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in moxart_tx_finished() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 00dec0ffb11b..e1651756bf9d 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -300,7 +300,7 @@ static void moxart_tx_finished(struct net_device *ndev) ndev->stats.tx_packets++; ndev->stats.tx_bytes += priv->tx_skb[tx_tail]->len; - dev_kfree_skb_irq(priv->tx_skb[tx_tail]); + dev_consume_skb_irq(priv->tx_skb[tx_tail]); priv->tx_skb[tx_tail] = NULL; tx_tail = TX_NEXT(tx_tail); From 8f5eeb9097fc46bd68a95870cbf2d04de3c4f968 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:56:53 +0800 Subject: [PATCH 1136/1436] net: fealnx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in intr_handler() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/fealnx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index ae55da60ed0e..c24fd56a2c71 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -1531,7 +1531,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) /* Free the original skb. */ pci_unmap_single(np->pci_dev, np->cur_tx->buffer, np->cur_tx->skbuff->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(np->cur_tx->skbuff); + dev_consume_skb_irq(np->cur_tx->skbuff); np->cur_tx->skbuff = NULL; --np->really_tx_count; if (np->cur_tx->control & TXLD) { From e78042eb6e2d831f9e616d7cfa1dce3021504b5d Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Tue, 12 Feb 2019 23:59:04 +0800 Subject: [PATCH 1137/1436] net: sis: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis190.c | 2 +- drivers/net/ethernet/sis/sis900.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 808cf9816673..5b351beb78cb 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -714,7 +714,7 @@ static void sis190_tx_interrupt(struct net_device *dev, sis190_unmap_tx_skb(tp->pci_dev, skb, txd); tp->Tx_skbuff[entry] = NULL; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } if (tp->dirty_tx != dirty_tx) { diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 4bb89f74742c..6073387511f8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1927,7 +1927,7 @@ static void sis900_finish_xmit (struct net_device *net_dev) pci_unmap_single(sis_priv->pci_dev, sis_priv->tx_ring[entry].bufptr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); sis_priv->tx_skbuff[entry] = NULL; sis_priv->tx_ring[entry].bufptr = 0; sis_priv->tx_ring[entry].cmdsts = 0; From b9560a22a440a8813e613226c730d38cbbdb39eb Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 00:00:02 +0800 Subject: [PATCH 1138/1436] net: macb: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in at91ether_interrupt() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Reviewed-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4d1509f431d7..0d6288dd609a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3763,7 +3763,7 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) dev->stats.tx_errors++; if (lp->skb) { - dev_kfree_skb_irq(lp->skb); + dev_consume_skb_irq(lp->skb); lp->skb = NULL; dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE); From 88e425843ee26f80b82d984e8ec4b662c38eb0ed Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 00:01:11 +0800 Subject: [PATCH 1139/1436] net: ixp4xx_eth: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in eth_txdone_irq() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index aee55c03def0..ed6623a9801e 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -139,7 +139,7 @@ #ifdef __ARMEB__ typedef struct sk_buff buffer_t; #define free_buffer dev_kfree_skb -#define free_buffer_irq dev_kfree_skb_irq +#define free_buffer_irq dev_consume_skb_irq #else typedef void buffer_t; #define free_buffer kfree From 01e3497163eadaed0bd404bf6d36210cecf591d5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 Feb 2019 16:01:53 +0000 Subject: [PATCH 1140/1436] qed: fix indentation issue with statements in an if-block There are some statements in an if-block that are not correctly indented. Fix these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 35c9f484eb9f..e61d1d905415 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2135,12 +2135,12 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks) struct qed_eth_pf_params *p_params = &p_hwfn->pf_params.eth_pf_params; - if (!p_params->num_vf_cons) - p_params->num_vf_cons = - ETH_PF_PARAMS_VF_CONS_DEFAULT; - qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH, - p_params->num_cons, - p_params->num_vf_cons); + if (!p_params->num_vf_cons) + p_params->num_vf_cons = + ETH_PF_PARAMS_VF_CONS_DEFAULT; + qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH, + p_params->num_cons, + p_params->num_vf_cons); p_hwfn->p_cxt_mngr->arfs_count = p_params->num_arfs_filters; break; } From cba2bf7a22acaaa18e9030596464b11ee57e077e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 Feb 2019 16:08:07 +0000 Subject: [PATCH 1141/1436] qlge: fix some indentation issues There are some statements that are indented incorrectly. Fix these. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c | 4 ++-- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c index 5edbd532127d..a6886cc5654c 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c @@ -249,8 +249,8 @@ static void ql_update_stats(struct ql_adapter *qdev) spin_lock(&qdev->stats_lock); if (ql_sem_spinlock(qdev, qdev->xg_sem_mask)) { - netif_err(qdev, drv, qdev->ndev, - "Couldn't get xgmac sem.\n"); + netif_err(qdev, drv, qdev->ndev, + "Couldn't get xgmac sem.\n"); goto quit; } /* diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 059ba9429e51..096515c27263 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1159,10 +1159,10 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring) map = lbq_desc->p.pg_chunk.map + lbq_desc->p.pg_chunk.offset; - dma_unmap_addr_set(lbq_desc, mapaddr, map); + dma_unmap_addr_set(lbq_desc, mapaddr, map); dma_unmap_len_set(lbq_desc, maplen, rx_ring->lbq_buf_size); - *lbq_desc->addr = cpu_to_le64(map); + *lbq_desc->addr = cpu_to_le64(map); pci_dma_sync_single_for_device(qdev->pdev, map, rx_ring->lbq_buf_size, From 5bf325a53202b8728cf7013b72688c46071e212e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Feb 2019 12:26:27 -0800 Subject: [PATCH 1142/1436] net: fix possible overflow in __sk_mem_raise_allocated() With many active TCP sockets, fat TCP sockets could fool __sk_mem_raise_allocated() thanks to an overflow. They would increase their share of the memory, instead of decreasing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 2b229f7be8eb..f43f935cb113 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1277,7 +1277,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(sk->sk_prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); diff --git a/net/core/sock.c b/net/core/sock.c index 6aa2e7e0b4fb..bc3512f230a3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2380,7 +2380,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; From 1f15bb4f399ceccd85522b24af31198f53516f9b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 Feb 2019 23:39:06 +0200 Subject: [PATCH 1143/1436] net: sched: flower: only return error from hw offload if skip_sw Recently introduced tc_setup_flow_action() can fail when parsing tcf_exts on some unsupported action commands. However, this should not affect the case when user did not explicitly request hw offload by setting skip_sw flag. Modify tc_setup_flow_action() callers to only propagate the error if skip_sw flag is set for filter that is being offloaded, and set extack error message in that case. Signed-off-by: Vlad Buslov Fixes: 3a7b68617de7 ("cls_api: add translator to flow_action representation") Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 32fa3e20adc5..640f83e7f93f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -396,7 +396,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (skip_sw) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + return 0; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); @@ -1503,7 +1507,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, &f->exts); if (err) { kfree(cls_flower.rule); - return err; + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + continue; } cls_flower.classid = f->res.classid; From 9f9dc493f724b311c84d88b8e77dad14abd06bcc Mon Sep 17 00:00:00 2001 From: John Hurley Date: Wed, 13 Feb 2019 00:23:52 +0000 Subject: [PATCH 1144/1436] flow_offload: fix block stats With the introduction of flow_stats_update(), drivers now update the stats fields of the passed tc_cls_flower_offload struct, rather than call tcf_exts_stats_update() directly to update the stats of offloaded TC flower rules. However, if multiple qdiscs are registered to a TC shared block and a flower rule is applied, then, when getting stats for the rule, multiple callbacks may be made. Take this into consideration by modifying flow_stats_update to gather the stats from all callbacks. Currently, the values in tc_cls_flower_offload only account for the last stats callback in the list. Fixes: 3b1903ef97c0 ("flow_offload: add statistics retrieval infrastructure and use it") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/flow_offload.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a307ccb18015..d035183c8d03 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -195,9 +195,9 @@ struct flow_stats { static inline void flow_stats_update(struct flow_stats *flow_stats, u64 bytes, u64 pkts, u64 lastused) { - flow_stats->pkts = pkts; - flow_stats->bytes = bytes; - flow_stats->lastused = lastused; + flow_stats->pkts += pkts; + flow_stats->bytes += bytes; + flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused); } #endif /* _NET_FLOW_OFFLOAD_H */ From fb14b096355b8c947a256e8e5259a4ebaca00866 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 13 Feb 2019 01:42:00 +0000 Subject: [PATCH 1145/1436] net: sched: remove duplicated include from cls_api.c Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 266fcb34fefe..9ad53895e604 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -38,7 +38,6 @@ #include #include #include -#include extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; From c25fff7171bebb76243ccc77f0f04aafa3db87be Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 13 Feb 2019 02:55:40 +0100 Subject: [PATCH 1146/1436] mm: add dma_addr_t to struct page The page_pool API is using page->private to store DMA addresses. As pointed out by David Miller we can't use that on 32-bit architectures with 64-bit DMA This patch adds a new dma_addr_t struct to allow storing DMA addresses Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Acked-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/mm_types.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2c471a2c43fa..0a36a22228e7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -95,6 +95,13 @@ struct page { */ unsigned long private; }; + struct { /* page_pool used by netstack */ + /** + * @dma_addr: might require a 64-bit value even on + * 32-bit architectures. + */ + dma_addr_t dma_addr; + }; struct { /* slab, slob and slub */ union { struct list_head slab_list; /* uses lru */ From 1567b85eb8ad2d0fe8d77e50118e876df565d9c7 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Wed, 13 Feb 2019 02:55:45 +0100 Subject: [PATCH 1147/1436] net: page_pool: don't use page->private to store dma_addr_t As pointed out by David Miller the current page_pool implementation stores dma_addr_t in page->private. This won't work on 32-bit platforms with 64-bit DMA addresses since the page->private is an unsigned long and the dma_addr_t a u64. A previous patch is adding dma_addr_t on struct page to accommodate this. This patch adapts the page_pool related functions to use the newly added struct for storing and retrieving DMA addresses from network drivers. Signed-off-by: Ilias Apalodimas Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/page_pool.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 43a932cb609b..897a69a1477e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -136,7 +136,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, if (!(pool->p.flags & PP_FLAG_DMA_MAP)) goto skip_dma_map; - /* Setup DMA mapping: use page->private for DMA-addr + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit + * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ dma = dma_map_page(pool->p.dev, page, 0, @@ -146,7 +148,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, put_page(page); return NULL; } - set_page_private(page, dma); /* page->private = dma; */ + page->dma_addr = dma; skip_dma_map: /* When page just alloc'ed is should/must have refcnt 1. */ @@ -175,13 +177,16 @@ EXPORT_SYMBOL(page_pool_alloc_pages); static void __page_pool_clean_page(struct page_pool *pool, struct page *page) { + dma_addr_t dma; + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) return; + dma = page->dma_addr; /* DMA unmap */ - dma_unmap_page(pool->p.dev, page_private(page), + dma_unmap_page(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir); - set_page_private(page, 0); + page->dma_addr = 0; } /* Return a page to the page allocator, cleaning up our state */ From 13f16d9d4ab7fccc918aafb146ea043be9574d49 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 13 Feb 2019 02:55:50 +0100 Subject: [PATCH 1148/1436] page_pool: use DMA_ATTR_SKIP_CPU_SYNC for DMA mappings As pointed out by Alexander Duyck, the DMA mapping done in page_pool needs to use the DMA attribute DMA_ATTR_SKIP_CPU_SYNC. As the principle behind page_pool keeping the pages mapped is that the driver takes over the DMA-sync steps. Reported-by: Alexander Duyck Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller --- net/core/page_pool.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 897a69a1477e..5b2252c6d49b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -141,9 +141,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir); + dma = dma_map_page_attrs(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(pool->p.dev, dma)) { put_page(page); return NULL; @@ -184,8 +184,9 @@ static void __page_pool_clean_page(struct page_pool *pool, dma = page->dma_addr; /* DMA unmap */ - dma_unmap_page(pool->p.dev, dma, - PAGE_SIZE << pool->p.order, pool->p.dma_dir); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); page->dma_addr = 0; } From f8b1f9f6459c8274865ef819bdedead577096254 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Wed, 13 Feb 2019 10:48:52 +0530 Subject: [PATCH 1149/1436] cxgb4vf: Few more link management changes. CR4_QSFP 10G Speed technology should be 10000baseKR_Full And also report available FEC modes. Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 84be3313fede..3300b69a42b3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1408,7 +1408,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, case FW_PORT_TYPE_CR4_QSFP: SET_LMM(FIBRE); FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); - FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full); + FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full); @@ -1419,6 +1419,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, break; } + if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) { + FW_CAPS_TO_LMM(FEC_RS, FEC_RS); + FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER); + } else { + SET_LMM(FEC_NONE); + } + FW_CAPS_TO_LMM(ANEG, Autoneg); FW_CAPS_TO_LMM(802_3_PAUSE, Pause); FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause); From 8d6ea932856c7087ce8c3d0e79494b7d5386f962 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Feb 2019 11:23:04 +0300 Subject: [PATCH 1150/1436] net: dsa: bcm_sf2: potential array overflow in bcm_sf2_sw_suspend() The value of ->num_ports comes from bcm_sf2_sw_probe() and it is less than or equal to DSA_MAX_PORTS. The ds->ports[] array is used inside the dsa_is_user_port() and dsa_is_cpu_port() functions. The ds->ports[] array is allocated in dsa_switch_alloc() and it has ds->num_ports elements so this leads to a static checker warning about a potential out of bounds read. Fixes: 8cfa94984c9c ("net: dsa: bcm_sf2: add suspend/resume callbacks") Signed-off-by: Dan Carpenter Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 361fbde76654..17ec32b0a1cc 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -690,7 +690,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) * port, the other ones have already been disabled during * bcm_sf2_sw_setup */ - for (port = 0; port < DSA_MAX_PORTS; port++) { + for (port = 0; port < ds->num_ports; port++) { if (dsa_is_user_port(ds, port) || dsa_is_cpu_port(ds, port)) bcm_sf2_port_disable(ds, port, NULL); } From 4446eb8dbeb223031fb652dd1e58e59d3d4b4aba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Feb 2019 11:56:50 +0300 Subject: [PATCH 1151/1436] lib: objagg: Fix an error code in objagg_hints_get() We need to set the error code on this path otherwise we return ERR_PTR(0) which would result in a NULL dereference in the caller. Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Dan Carpenter Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- lib/objagg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/objagg.c b/lib/objagg.c index 781f41c3c47d..d552ec9c60ed 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -968,8 +968,10 @@ struct objagg_hints *objagg_hints_get(struct objagg *objagg, if (err) goto err_fillup_hints; - if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) + if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) { + err = -EINVAL; goto err_node_count_check; + } return objagg_hints; From 951d3d6fcd72f10b50e2a1182fb533dc2c9da0e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Feb 2019 11:58:20 +0300 Subject: [PATCH 1152/1436] test_objagg: Test the correct variable There is a typo here. We intended to check "objagg2" but we instead test "objagg" which is not an error pointer. Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Dan Carpenter Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- lib/test_objagg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 3744573b6365..3dd45777b13c 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -952,8 +952,8 @@ static int test_hints_case(const struct hints_case *hints_case) } objagg2 = objagg_create(&delta_ops, hints, &world2); - if (IS_ERR(objagg)) - return PTR_ERR(objagg); + if (IS_ERR(objagg2)) + return PTR_ERR(objagg2); for (i = 0; i < hints_case->key_ids_count; i++) { objagg_obj = world_obj_get(&world2, objagg2, From e7c2e3b570442d2a50d43ba0da951329ed0c3c19 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Feb 2019 11:59:31 +0300 Subject: [PATCH 1153/1436] test_objagg: Uninitialized variable in error handling We need to set the error message on this path otherwise some of the callers, such as test_hints_case(), print from an uninitialized pointer. We had a similar bug earlier and set "errmsg" to NULL in the caller, test_delta_action_item(). That code is no longer required so I have removed it. Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Dan Carpenter Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- lib/test_objagg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 3dd45777b13c..72c1abfa154d 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -745,8 +745,10 @@ static int check_expect_stats(struct objagg *objagg, int err; stats = objagg_stats_get(objagg); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; @@ -786,7 +788,6 @@ static int test_delta_action_item(struct world *world, if (err) goto errout; - errmsg = NULL; err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg); if (err) { pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg); From d517ee7ca8ff8dfe2b200fd7b3087f00ede5f56f Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:44 +0000 Subject: [PATCH 1154/1436] mlxsw: spectrum: Move QSFP EEPROM definitions to common location Move QSFP EEPROM definitions to common location from the spectrum driver in order to make them available for other mlxsw modules. They are common for all kind of chips and have relation to SFF specifications 8024, 8436, 8472, 8636, rather than to chip type. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 32 +++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.c | 62 +++++++------------ 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 227720ce3982..1190a6a501a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -8055,13 +8055,41 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); */ MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); -#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 +#define MLXSW_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 +#define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 +#define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0 +#define MLXSW_REG_MCIA_TH_ITEM_SIZE 2 +#define MLXSW_REG_MCIA_TH_PAGE_NUM 3 +#define MLXSW_REG_MCIA_PAGE0_LO 0 +#define MLXSW_REG_MCIA_TH_PAGE_OFF 0x80 + +enum mlxsw_reg_mcia_eeprom_module_info_rev_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_reg_mcia_eeprom_module_info_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, +}; + +enum mlxsw_reg_mcia_eeprom_module_info { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE, +}; /* reg_mcia_eeprom * Bytes to read/write. * Access: RW */ -MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE); +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, u8 page_number, u16 device_addr, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9686d3822b92..abac923a8d04 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2723,23 +2723,23 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, unsigned int *p_read_size) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; char mcia_pl[MLXSW_REG_MCIA_LEN]; u16 i2c_addr; int status; int err; - size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); - if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && - offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) + if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ - size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; - i2c_addr = MLXSW_SP_I2C_ADDR_LOW; - if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { - i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; - offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; + if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; + offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; } mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, @@ -2760,55 +2760,37 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -enum mlxsw_sp_eeprom_module_info_rev_id { - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, -}; - -enum mlxsw_sp_eeprom_module_info_id { - MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, -}; - -enum mlxsw_sp_eeprom_module_info { - MLXSW_SP_EEPROM_MODULE_INFO_ID, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, -}; - static int mlxsw_sp_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); - u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE]; + u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; + u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; u8 module_rev_id, module_id; unsigned int read_size; int err; - err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, offset, module_info, &read_size); if (err) return err; - if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE) + if (read_size < offset) return -EIO; - module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID]; - module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID]; + module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID]; switch (module_id) { - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS: - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28: - if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 || - module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) { modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; } else { @@ -2816,7 +2798,7 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev, modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; } break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; From 5f28ef71a5ce7f3995c11e765683202fb10c1fbd Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:45 +0000 Subject: [PATCH 1155/1436] mlxsw: reg: Add Management Temperature Bulk Register Add MTBR (Management Temperature Bulk Register), which is used for port temperature reading in a bulk mode. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 75 +++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1190a6a501a5..13856f40b8a7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -8001,6 +8001,80 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MTBR - Management Temperature Bulk Register + * ------------------------------------------- + * This register is used for bulk temperature reading. + */ +#define MLXSW_REG_MTBR_ID 0x900F +#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */ +#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN + \ + MLXSW_REG_MTBR_REC_LEN * \ + MLXSW_REG_MTBR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); + +/* reg_mtbr_base_sensor_index + * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors, + * 64-127 are mapped to the SFP+/QSFP modules sequentially). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7); + +/* reg_mtbr_num_rec + * Request: Number of records to read + * Response: Number of records read + * See above description for more details. + * Range 1..255 + * Access: RW + */ +MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8); + +/* reg_mtbr_rec_max_temp + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, + 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +/* reg_mtbr_rec_temp + * Temperature reading from the sensor. Reading is in 0..125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, + MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index, + u8 num_rec) +{ + MLXSW_REG_ZERO(mtbr, payload); + mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index); + mlxsw_reg_mtbr_num_rec_set(payload, num_rec); +} + +/* Error codes from temperatute reading */ +enum mlxsw_reg_mtbr_temp_status { + MLXSW_REG_MTBR_NO_CONN = 0x8000, + MLXSW_REG_MTBR_NO_TEMP_SENS = 0x8001, + MLXSW_REG_MTBR_INDEX_NA = 0x8002, + MLXSW_REG_MTBR_BAD_SENS_INFO = 0x8003, +}; + +/* Base index for reading modules temperature */ +#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64 + +static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind, + u16 *p_temp, u16 *p_max_temp) +{ + if (p_temp) + *p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind); + if (p_max_temp) + *p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind); +} + /* MCIA - Management Cable Info Access * ----------------------------------- * MCIA register is used to access the SFP+ and QSFP connector's EPROM. @@ -9779,6 +9853,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsl), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mtbr), MLXSW_REG(mcia), MLXSW_REG(mpat), MLXSW_REG(mpar), From 3760c2b99e20ee16cd328aca890ac0aca3c72ac5 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:46 +0000 Subject: [PATCH 1156/1436] mlxsw: reg: Add Fan Out of Range Event Register Add FORE (Fan Out of Range Event Register), which is used for fan fault reading. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 13856f40b8a7..cbd0193ec3f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -7875,6 +7875,35 @@ static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho, *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload); } +/* FORE - Fan Out of Range Event Register + * -------------------------------------- + * This register reports the status of the controlled fans compared to the + * range defined by the MFSL register. + */ +#define MLXSW_REG_FORE_ID 0x9007 +#define MLXSW_REG_FORE_LEN 0x0C + +MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN); + +/* fan_under_limit + * Fan speed is below the low limit defined in MFSL register. Each bit relates + * to a single tachometer and indicates the specific tachometer reading is + * below the threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10); + +static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho, + bool *fault) +{ + u16 limit; + + if (fault) { + limit = mlxsw_reg_fore_fan_under_limit_get(payload); + *fault = limit & BIT(tacho); + } +} + /* MTCAP - Management Temperature Capabilities * ------------------------------------------- * This register exposes the capabilities of the device and @@ -9851,6 +9880,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsc), MLXSW_REG(mfsm), MLXSW_REG(mfsl), + MLXSW_REG(fore), MLXSW_REG(mtcap), MLXSW_REG(mtmp), MLXSW_REG(mtbr), From d93c19a1d95c90b6ded1dc3c8ccfe0e46e220af8 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:47 +0000 Subject: [PATCH 1157/1436] mlxsw: core: Add API for QSFP module temperature thresholds reading Add new API to read QSFP module's temperature thresholds - warning and critical. New internal API reads the temperature thresholds from the modules, which are equipped with the thermal sensor. These thresholds will be exposed via hwmon subsystem. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 2 +- .../net/ethernet/mellanox/mlxsw/core_env.c | 117 ++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/core_env.h | 10 ++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/core_env.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/core_env.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index bbf45f10c208..a01d15546e37 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o core_acl_flex_keys.o \ - core_acl_flex_actions.o + core_acl_flex_actions.o core_env.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c new file mode 100644 index 000000000000..160d6cd164f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include +#include + +#include "core.h" +#include "core_env.h" +#include "item.h" +#include "reg.h" + +static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, + bool *qsfp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + u8 ident; + int err; + + mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + ident = eeprom_tmp[0]; + switch (ident) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + *qsfp = false; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + *qsfp = true; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + union { + u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE]; + u16 temp; + } temp_thresh; + char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 module_temp; + bool qsfp; + int err; + + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + /* Don't read temperature thresholds for module with no valid info. */ + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL); + switch (module_temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */ + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + *temp = 0; + return 0; + default: + /* Do not consider thresholds for zero temperature. */ + if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { + *temp = 0; + return 0; + } + break; + } + + /* Read Free Side Device Temperature Thresholds from page 03h + * (MSB at lower byte address). + * Bytes: + * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM); + * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM); + * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN); + * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN); + */ + + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(core, module, &qsfp); + if (err) + return err; + + if (qsfp) + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_TH_PAGE_NUM, + MLXSW_REG_MCIA_TH_PAGE_OFF + off, + MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + else + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_PAGE0_LO, + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_HIGH); + + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE); + *temp = temp_thresh.temp * 1000; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h new file mode 100644 index 000000000000..6dbdf63f3ee1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ENV_H +#define _MLXSW_CORE_ENV_H + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp); + +#endif From 3dcfe179570550c888ecfcd3d443d4de90a2d855 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:48 +0000 Subject: [PATCH 1158/1436] mlxsw: core: Set different thermal polling time based on bus frequency capability Add low frequency bus capability in order to allow core functionality separation based on bus type. Driver could run over PCIe, which is considered as high frequency bus or I2C, which is considered as low frequency bus. In the last case time setting, for example, for thermal polling interval, should be increased. Use different thermal monitoring based on bus type. For I2C bus time is set to 20 seconds, while for PCIe 1 second polling interval is used. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.h | 1 + drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 10 ++++++++-- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c8e16a305969..cd0c6aa0dff9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -344,6 +344,7 @@ struct mlxsw_bus_info { struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; + u8 low_frequency; }; struct mlxsw_hwmon; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 61f897b40f82..b1f9b459766c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -13,6 +13,7 @@ #include "core.h" #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ +#define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ #define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */ #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 @@ -76,6 +77,7 @@ struct mlxsw_thermal { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; struct thermal_zone_device *tzdev; + int polling_delay; struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; @@ -172,7 +174,7 @@ static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev, mutex_lock(&tzdev->lock); if (mode == THERMAL_DEVICE_ENABLED) - tzdev->polling_delay = MLXSW_THERMAL_POLL_INT; + tzdev->polling_delay = thermal->polling_delay; else tzdev->polling_delay = 0; @@ -423,13 +425,17 @@ int mlxsw_thermal_init(struct mlxsw_core *core, thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, i); + thermal->polling_delay = bus_info->low_frequency ? + MLXSW_THERMAL_SLOW_POLL_INT : + MLXSW_THERMAL_POLL_INT; + thermal->tzdev = thermal_zone_device_register("mlxsw", MLXSW_THERMAL_NUM_TRIPS, MLXSW_THERMAL_TRIP_MASK, thermal, &mlxsw_thermal_ops, NULL, 0, - MLXSW_THERMAL_POLL_INT); + thermal->polling_delay); if (IS_ERR(thermal->tzdev)) { err = PTR_ERR(thermal->tzdev); dev_err(dev, "Failed to register thermal zone\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 798bd5aca384..a87ca6b6580d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -503,6 +503,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, mlxsw_i2c->bus_info.device_kind = id->name; mlxsw_i2c->bus_info.device_name = client->name; mlxsw_i2c->bus_info.dev = &client->dev; + mlxsw_i2c->bus_info.low_frequency = true; mlxsw_i2c->dev = &client->dev; err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, From 69115b7d01c4715faf7803fea54017d76f195155 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:50 +0000 Subject: [PATCH 1159/1436] mlxsw: core: Modify thermal zone definition Modify thermal zone trip points setting for better alignment with system thermal requirement. Add hysteresis thresholds for thermal trips in order to avoid throttling around thermal trip point. If hysteresis temperature is not considered, PWM can have side effect of flip up/down on thermal trip point boundary. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/core_thermal.c | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index b1f9b459766c..dbf9a845084a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -15,6 +15,7 @@ #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ #define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ #define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */ +#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -30,6 +31,7 @@ struct mlxsw_thermal_trip { int type; int temp; + int hyst; int min_state; int max_state; }; @@ -38,25 +40,22 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* In range - 0-40% PWM */ .type = THERMAL_TRIP_ACTIVE, .temp = 75000, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = 0, .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, }, - { /* High - 40-100% PWM */ + { + /* In range - 40-100% PWM */ .type = THERMAL_TRIP_ACTIVE, .temp = 80000, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, .max_state = MLXSW_THERMAL_MAX_STATE, }, - { - /* Very high - 100% PWM */ - .type = THERMAL_TRIP_ACTIVE, - .temp = 85000, - .min_state = MLXSW_THERMAL_MAX_STATE, - .max_state = MLXSW_THERMAL_MAX_STATE, - }, { /* Warning */ .type = THERMAL_TRIP_HOT, - .temp = 105000, + .temp = 85000, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, @@ -246,6 +245,24 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, return 0; } +static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int *p_hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + *p_hyst = thermal->trips[trip].hyst; + return 0; +} + +static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + thermal->trips[trip].hyst = hyst; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -255,6 +272,8 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .get_trip_type = mlxsw_thermal_get_trip_type, .get_trip_temp = mlxsw_thermal_get_trip_temp, .set_trip_temp = mlxsw_thermal_set_trip_temp, + .get_trip_hyst = mlxsw_thermal_get_trip_hyst, + .set_trip_hyst = mlxsw_thermal_set_trip_hyst, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, From 41e760841d262a88c39b16808e16d57e7a13cc66 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:51 +0000 Subject: [PATCH 1160/1436] mlxsw: core: Replace thermal temperature trips with defines Replace thermal hardcoded temperature trip values with defines. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_thermal.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index dbf9a845084a..b9fb33417a82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -14,7 +14,10 @@ #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ #define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ -#define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */ +#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ +#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ +#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 @@ -39,7 +42,7 @@ struct mlxsw_thermal_trip { static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* In range - 0-40% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 75000, + .temp = MLXSW_THERMAL_ASIC_TEMP_NORM, .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = 0, .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, @@ -47,21 +50,21 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* In range - 40-100% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 80000, + .temp = MLXSW_THERMAL_ASIC_TEMP_HIGH, .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Warning */ .type = THERMAL_TRIP_HOT, - .temp = 85000, + .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Critical - soft poweroff */ .type = THERMAL_TRIP_CRITICAL, - .temp = MLXSW_THERMAL_MAX_TEMP, + .temp = MLXSW_THERMAL_ASIC_TEMP_CRIT, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, } @@ -238,7 +241,7 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > MLXSW_THERMAL_MAX_TEMP) + temp > MLXSW_THERMAL_ASIC_TEMP_CRIT) return -EINVAL; thermal->trips[trip].temp = temp; From 2ee1165118140055bc43d22c6311acc60caf0890 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:52 +0000 Subject: [PATCH 1161/1436] mlxsw: core: Rename cooling device Rename cooling device from "Fan" to "mlxsw_fan". Name "Fan" is too common name, and such name is misleading, while it's interpreted by user. For example name "Fan" could be used by ACPI. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index b9fb33417a82..8b71706f7a27 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -431,8 +431,9 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (pwm_active & BIT(i)) { struct thermal_cooling_device *cdev; - cdev = thermal_cooling_device_register("Fan", thermal, - &mlxsw_cooling_ops); + cdev = thermal_cooling_device_register("mlxsw_fan", + thermal, + &mlxsw_cooling_ops); if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); From 2c6a33cd334baada7d9c1faeb0623407f37273f0 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:53 +0000 Subject: [PATCH 1162/1436] mlxsw: core: Extend hwmon interface with fan fault attribute Add new fan hwmon attribute for exposing fan faults (fault indication is read from Fan Out of Range Event Register). Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_hwmon.c | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index e04e8162aa14..40e13093b62d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -121,6 +121,27 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); } +static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char fore_pl[MLXSW_REG_FORE_LEN]; + bool fault; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault); + + return sprintf(buf, "%u\n", fault); +} + static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -172,6 +193,7 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, MLXSW_HWMON_ATTR_TYPE_TEMP_RST, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, MLXSW_HWMON_ATTR_TYPE_PWM, }; @@ -209,6 +231,12 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "fan%u_input", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_fault", num + 1); + break; case MLXSW_HWMON_ATTR_TYPE_PWM: mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; @@ -280,10 +308,14 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); num = 0; for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { - if (tacho_active & BIT(type_index)) + if (tacho_active & BIT(type_index)) { mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, type_index, num++); + } } num = 0; for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { From 5c42eaa07bd040d965b7a41b9596be5741016d1a Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:54 +0000 Subject: [PATCH 1163/1436] mlxsw: core: Extend hwmon interface with QSFP module temperature attributes Add new attributes to hwmon object for exposing QSFP module temperature input, fault indication, critical and emergency thresholds. Temperature input and fault indication are read from Management Temperature Bulk Register. Temperature thresholds are read from Management Cable Info Access Register. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_hwmon.c | 218 +++++++++++++++++- 1 file changed, 215 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 40e13093b62d..504f6bba8874 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -7,8 +7,10 @@ #include #include #include +#include #include "core.h" +#include "core_env.h" #define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 #define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ @@ -30,6 +32,7 @@ struct mlxsw_hwmon { struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; + u8 sensor_count; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -188,6 +191,136 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, return len; } +static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temprature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + temp = 0; + break; + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + temp = 0; + break; + default: + temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + break; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u8 module, fault; + u16 temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temprature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + fault = 1; + break; + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + default: + fault = 0; + break; + } + + return sprintf(buf, "%u\n", fault); +} + +static ssize_t +mlxsw_hwmon_module_temp_critical_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + int temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_WARN, &temp); + if (err) { + dev_err(dev, "Failed to query module temprature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_emergency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + u8 module; + int temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_ALARM, &temp); + if (err) { + dev_err(dev, "Failed to query module temprature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, @@ -195,6 +328,10 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_FAN_RPM, MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, MLXSW_HWMON_ATTR_TYPE_PWM, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -244,6 +381,33 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "pwm%u", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_fault", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_critical_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_emergency_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency", num + 1); + break; default: WARN_ON(1); } @@ -261,7 +425,6 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) { char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - u8 sensor_count; int i; int err; @@ -270,8 +433,8 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); return err; } - sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); - for (i = 0; i < sensor_count; i++) { + mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < mlxsw_hwmon->sensor_count; i++) { mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -327,6 +490,50 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) return 0; } +static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); + char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; + int i, index; + u8 width; + int err; + + /* Add extra attributes for module temperature. Sensor index is + * assigned to sensor_count value, while all indexed before + * sensor_count are already utilized by the sensors connected through + * mtmp register by mlxsw_hwmon_temp_init(). + */ + index = mlxsw_hwmon->sensor_count; + for (i = 1; i < module_count; i++) { + mlxsw_reg_pmlp_pack(pmlp_pl, i); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), + pmlp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", + i); + return err; + } + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + if (!width) + continue; + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + index, index); + index++; + } + + return 0; +} + int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info, struct mlxsw_hwmon **p_hwmon) @@ -349,6 +556,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, if (err) goto err_fans_init; + err = mlxsw_hwmon_module_init(mlxsw_hwmon); + if (err) + goto err_temp_module_init; + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; @@ -365,6 +576,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; err_hwmon_register: +err_temp_module_init: err_fans_init: err_temp_init: kfree(mlxsw_hwmon); From a53779de6a0ea6f25d3d5b60567ffc3671f1da7b Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:55 +0000 Subject: [PATCH 1164/1436] mlxsw: core: Add QSFP module temperature label attribute to hwmon Add label attribute to hwmon object for exposing QSFP module's temperature sensor name. Modules are labeled as "front panel xxx". The label is used by utilities such as "sensors": front panel 001: +0.0C (crit = +0.0C, emerg = +0.0C) .. front panel 020: +31.0C (crit = +70.0C, emerg = +80.0C) .. front panel 056: +41.0C (crit = +70.0C, emerg = +80.0C) Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_hwmon.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 504f6bba8874..f1ada4cdbd6b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -321,6 +321,18 @@ mlxsw_hwmon_module_temp_emergency_show(struct device *dev, return sprintf(buf, "%u\n", temp); } +static ssize_t +mlxsw_hwmon_module_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + + return sprintf(buf, "front panel %03u\n", + mlwsw_hwmon_attr->type_index); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, @@ -332,6 +344,7 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -408,6 +421,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "temp%u_emergency", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; default: WARN_ON(1); } @@ -528,6 +548,9 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + index, index); index++; } From 97cd342ae41fd524906e5acb0b760fd8331f3b1b Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 13 Feb 2019 11:28:56 +0000 Subject: [PATCH 1165/1436] mlxsw: core: Allow thermal zone binding to an external cooling device Allow thermal zone binding to an external cooling device from the cooling devices white list. It provides support for Mellanox next generation systems on which cooling device logic is not controlled through the switch registers. Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 8b71706f7a27..821fef2e2230 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -31,6 +31,11 @@ #define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) #define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ +/* External cooling devices, allowed for binding to mlxsw thermal zones. */ +static char * const mlxsw_thermal_external_allowed_cdev[] = { + "mlxreg_fan", +}; + struct mlxsw_thermal_trip { int type; int temp; @@ -107,6 +112,13 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, if (thermal->cdevs[i] == cdev) return i; + /* Allow mlxsw thermal zone binding to an external cooling device */ + for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { + if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i], + sizeof(cdev->type))) + return 0; + } + return -ENODEV; } From 8cd8f0ce0d6aafe661cb3d6781c8b82bc696c04d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 14 Feb 2019 17:27:08 +0530 Subject: [PATCH 1166/1436] x86/CPU: Add Icelake model number Add the CPUID model number of Icelake (ICL) mobile processors to the Intel family list. Icelake U/Y series uses model number 0x7E. Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Borislav Petkov Cc: Andy Shevchenko Cc: Dave Hansen Cc: "David E. Box" Cc: dvhart@infradead.org Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Cc: platform-driver-x86@vger.kernel.org Cc: Qiuxu Zhuo Cc: Srinivas Pandruvada Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190214115712.19642-2-rajneesh.bhardwaj@linux.intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index d9a9993af882..9f15384c504a 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -52,6 +52,8 @@ #define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 +#define INTEL_FAM6_ICELAKE_MOBILE 0x7E + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ From c112b5f50232a257056903040c66d97efb536889 Mon Sep 17 00:00:00 2001 From: Luwei Kang Date: Thu, 14 Feb 2019 10:48:07 +0800 Subject: [PATCH 1167/1436] KVM: x86: Recompute PID.ON when clearing PID.SN Some Posted-Interrupts from passthrough devices may be lost or overwritten when the vCPU is in runnable state. The SN (Suppress Notification) of PID (Posted Interrupt Descriptor) will be set when the vCPU is preempted (vCPU in KVM_MP_STATE_RUNNABLE state but not running on physical CPU). If a posted interrupt comes at this time, the irq remapping facility will set the bit of PIR (Posted Interrupt Requests) but not ON (Outstanding Notification). Then, the interrupt will not be seen by KVM, which always expects PID.ON=1 if PID.PIR=1 as documented in the Intel processor SDM but not in the VT-d specification. To fix this, restore the invariant after PID.SN is cleared. Signed-off-by: Luwei Kang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 26 +++++++++++--------------- arch/x86/kvm/vmx/vmx.h | 12 ++++++------ arch/x86/kvm/x86.c | 2 +- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 95d618045001..b9a27fc7c0be 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1193,21 +1193,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) return; - /* - * First handle the simple case where no cmpxchg is necessary; just - * allow posting non-urgent interrupts. - * - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change - * PI.NDST: pi_post_block will do it for us and the wakeup_handler - * expects the VCPU to be on the blocked_vcpu_list that matches - * PI.NDST. - */ - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || - vcpu->cpu == cpu) { - pi_clear_sn(pi_desc); - return; - } - /* The full case. */ do { old.control = new.control = pi_desc->control; @@ -1222,6 +1207,17 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) new.sn = 0; } while (cmpxchg64(&pi_desc->control, old.control, new.control) != old.control); + + /* + * Clear SN before reading the bitmap. The VT-d firmware + * writes the bitmap and reads SN atomically (5.2.3 in the + * spec), so it doesn't really have a memory barrier that + * pairs with this, but we cannot do that and we need one. + */ + smp_mb__after_atomic(); + + if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) + pi_set_on(pi_desc); } /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 99328954c2fc..0ac0a64c7790 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -337,18 +337,18 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); } -static inline void pi_clear_sn(struct pi_desc *pi_desc) -{ - return clear_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} - static inline void pi_set_sn(struct pi_desc *pi_desc) { return set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); } +static inline void pi_set_on(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + static inline void pi_clear_on(struct pi_desc *pi_desc) { clear_bit(POSTED_INTR_ON, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e67ecf25e690..941f932373d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7801,7 +7801,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * 1) We should set ->mode before checking ->requests. Please see * the comment in kvm_vcpu_exiting_guest_mode(). * - * 2) For APICv, we should set ->mode before checking PIR.ON. This + * 2) For APICv, we should set ->mode before checking PID.ON. This * pairs with the memory barrier implicit in pi_test_and_set_on * (see vmx_deliver_posted_interrupt). * From 98ae70cc476e833332a2c6bb72f941a25f0de226 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 14 Feb 2019 12:08:58 +0800 Subject: [PATCH 1168/1436] kvm: vmx: Fix entry number check for add_atomic_switch_msr() Commit ca83b4a7f2d068da79a0 ("x86/KVM/VMX: Add find_msr() helper function") introduces the helper function find_msr(), which returns -ENOENT when not find the msr in vmx->msr_autoload.guest/host. Correct checking contion of no more available entry in vmx->msr_autoload. Fixes: ca83b4a7f2d0 ("x86/KVM/VMX: Add find_msr() helper function") Cc: stable@vger.kernel.org Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b9a27fc7c0be..30a6bcd735ec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -863,7 +863,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, if (!entry_only) j = find_msr(&m->host, msr); - if (i == NR_AUTOLOAD_MSRS || j == NR_AUTOLOAD_MSRS) { + if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || + (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) { printk_once(KERN_WARNING "Not enough msr switch entries. " "Can't add msr %x\n", msr); return; From 415d39427317d1532d7f097471cc8aca5f229b69 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 11 Feb 2019 10:49:48 -0800 Subject: [PATCH 1169/1436] bonding: check slave set command firstly This patch is a little improvement. If user use the command shown as below, we should print the info [1] instead of [2]. The eth0 exists actually, and it may confuse user. $ echo "eth0" > /sys/class/net/bond4/bonding/slaves [1] "bond4: no command found in slaves file - use +ifname or -ifname" [2] "write error: No such device" Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4d5d01cb8141..da1fc17295d9 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1375,6 +1375,7 @@ static int bond_option_slaves_set(struct bonding *bond, sscanf(newval->string, "%16s", command); /* IFNAMSIZ*/ ifname = command + 1; if ((strlen(command) <= 1) || + (command[0] != '+' && command[0] != '-') || !dev_valid_name(ifname)) goto err_no_cmd; @@ -1398,6 +1399,7 @@ static int bond_option_slaves_set(struct bonding *bond, break; default: + /* should not run here. */ goto err_no_cmd; } From fdd41ec21e150113e8fe169783db9b2030f5491b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 12 Feb 2019 14:23:58 -0600 Subject: [PATCH 1170/1436] devlink: Return right error code in case of errors for region read devlink_nl_cmd_region_read_dumpit() misses to return right error code on most error conditions. Return the right error code on such errors. Fixes: 4e54795a27f5 ("devlink: Add support for region snapshot read command") Signed-off-by: Parav Pandit Signed-off-by: David S. Miller --- net/core/devlink.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 283c3ed9f25e..312084f76a0a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3646,26 +3646,34 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_free; devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) + if (IS_ERR(devlink)) { + err = PTR_ERR(devlink); goto out_free; + } mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) + !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + err = -EINVAL; goto out_unlock; + } region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); region = devlink_region_get_by_name(devlink, region_name); - if (!region) + if (!region) { + err = -EINVAL; goto out_unlock; + } hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); - if (!hdr) + if (!hdr) { + err = -EMSGSIZE; goto out_unlock; + } err = devlink_nl_put_handle(skb, devlink); if (err) @@ -3676,8 +3684,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); - if (!chunks_attr) + if (!chunks_attr) { + err = -EMSGSIZE; goto nla_put_failure; + } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { @@ -3700,8 +3710,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; /* Check if there was any progress done to prevent infinite loop */ - if (ret_offset == start_offset) + if (ret_offset == start_offset) { + err = -EINVAL; goto nla_put_failure; + } *((u64 *)&cb->args[0]) = ret_offset; @@ -3720,7 +3732,7 @@ out_unlock: mutex_unlock(&devlink_mutex); out_free: kfree(attrs); - return 0; + return err; } struct devlink_info_req { From dac7c08f988bbc7208fc7403a5d75d4bb791139c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 12 Feb 2019 14:24:08 -0600 Subject: [PATCH 1171/1436] devlink: Fix list access without lock while reading region While finding the devlink device during region reading, devlink device list is accessed and devlink device is returned without holding a lock. This could lead to use-after-free accesses. While at it, add lockdep assert to ensure that all future callers hold the lock when calling devlink_get_from_attrs(). Fixes: 4e54795a27f5 ("devlink: Add support for region snapshot read command") Signed-off-by: Parav Pandit Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 312084f76a0a..1d7502a5a651 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -116,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + lockdep_assert_held(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && @@ -3645,13 +3647,13 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, if (err) goto out_free; + mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) { err = PTR_ERR(devlink); - goto out_free; + goto out_dev; } - mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); if (!attrs[DEVLINK_ATTR_REGION_NAME] || @@ -3729,6 +3731,7 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); +out_dev: mutex_unlock(&devlink_mutex); out_free: kfree(attrs); From ca5e9aba753ed15d173c7a7b88e4d402b7ca8121 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Tue, 12 Feb 2019 19:26:03 -0800 Subject: [PATCH 1172/1436] time: Add time_types.h sys/time.h is the mandated include for many time related defines. However, linux/time.h overlaps sys/time.h significantly and this makes including both from userspace or one from the other impossible. This also means that userspace can get away with including sys/time.h whenever it needs linux/time.h and this is what's been happening in the user world usually. But, we have new data types that we plan to use in the uapi time interfaces also defined in the linux/time.h. But, we are unable to use these types when sys/time.h is included. Hence, move the new types to a new header, time_types.h. We intend to eventually have all the uapi defines that the kernel uses defined in this header. Note that the plan is to replace uapi interfaces with timeval to use __kernel_old_timeval, timespec to use __kernel_old_timespec etc. Reported-by: Ran Rozenstein Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW") Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/uapi/linux/time.h | 36 +---------------------------- include/uapi/linux/time_types.h | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) create mode 100644 include/uapi/linux/time_types.h diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index b8ad1b86b942..958932effc5e 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_TIME_H #include - +#include #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC @@ -23,7 +23,6 @@ struct timezone { int tz_dsttime; /* type of dst correction */ }; - /* * Names of the interval timers, and structure * defining a timer setting: @@ -42,39 +41,6 @@ struct itimerval { struct timeval it_value; /* current value */ }; -#ifndef __kernel_timespec -struct __kernel_timespec { - __kernel_time64_t tv_sec; /* seconds */ - long long tv_nsec; /* nanoseconds */ -}; -#endif - -#ifndef __kernel_itimerspec -struct __kernel_itimerspec { - struct __kernel_timespec it_interval; /* timer period */ - struct __kernel_timespec it_value; /* timer expiration */ -}; -#endif - -/* - * legacy timeval structure, only embedded in structures that - * traditionally used 'timeval' to pass time intervals (not absolute - * times). Do not add new users. If user space fails to compile - * here, this is probably because it is not y2038 safe and needs to - * be changed to use another interface. - */ -#ifndef __kernel_old_timeval -struct __kernel_old_timeval { - __kernel_long_t tv_sec; - __kernel_long_t tv_usec; -}; -#endif - -struct __kernel_sock_timeval { - __s64 tv_sec; - __s64 tv_usec; -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ diff --git a/include/uapi/linux/time_types.h b/include/uapi/linux/time_types.h new file mode 100644 index 000000000000..459070c61d47 --- /dev/null +++ b/include/uapi/linux/time_types.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TIME_TYPES_H +#define _UAPI_LINUX_TIME_TYPES_H + +#include + +#ifndef __kernel_timespec +struct __kernel_timespec { + __kernel_time64_t tv_sec; /* seconds */ + long long tv_nsec; /* nanoseconds */ +}; +#endif + +#ifndef __kernel_itimerspec +struct __kernel_itimerspec { + struct __kernel_timespec it_interval; /* timer period */ + struct __kernel_timespec it_value; /* timer expiration */ +}; +#endif + +/* + * legacy timeval structure, only embedded in structures that + * traditionally used 'timeval' to pass time intervals (not absolute + * times). Do not add new users. If user space fails to compile + * here, this is probably because it is not y2038 safe and needs to + * be changed to use another interface. + */ +#ifndef __kernel_old_timeval +struct __kernel_old_timeval { + __kernel_long_t tv_sec; + __kernel_long_t tv_usec; +}; +#endif + +struct __kernel_sock_timeval { + __s64 tv_sec; + __s64 tv_usec; +}; + +#endif /* _UAPI_LINUX_TIME_TYPES_H */ From 460a2db0273efe15488a3e4b88da2678eb403178 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Tue, 12 Feb 2019 19:26:04 -0800 Subject: [PATCH 1173/1436] errqueue.h: Include time_types.h Now that we have a separate header for struct __kernel_timespec, include it directly without relying on userspace to do it. Reported-by: Ran Rozenstein Signed-off-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/uapi/linux/errqueue.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index d955b9e32288..28491dac074b 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -3,6 +3,7 @@ #define _UAPI_LINUX_ERRQUEUE_H #include +#include struct sock_extended_err { __u32 ee_errno; From 76cba8fd9c7d8dd2d18c8bee581c4176383441c6 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:12:02 +0800 Subject: [PATCH 1174/1436] net: dlink: sundance: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in intr_handler() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Remove a redundant blank line in intr_handler(). Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 1a27176381fb..4a37a69764ce 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -1193,7 +1193,6 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) int handled = 0; int i; - do { int intr_status = ioread16(ioaddr + IntrStatus); iowrite16(intr_status, ioaddr + IntrStatus); @@ -1286,7 +1285,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) dma_unmap_single(&np->pci_dev->dev, le32_to_cpu(np->tx_ring[entry].frag[0].addr), skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq (np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; np->tx_ring[entry].frag[0].addr = 0; np->tx_ring[entry].frag[0].length = 0; @@ -1305,7 +1304,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) dma_unmap_single(&np->pci_dev->dev, le32_to_cpu(np->tx_ring[entry].frag[0].addr), skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq (np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; np->tx_ring[entry].frag[0].addr = 0; np->tx_ring[entry].frag[0].length = 0; From fc67ade130382e21a43eca81c5ed098d8a82eedd Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:14:54 +0800 Subject: [PATCH 1175/1436] net: amd: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/lance.c | 2 +- drivers/net/ethernet/amd/ni65.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index b56d84c7df46..f90b454b1642 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -1084,7 +1084,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id) /* We must free the original skb if it's not a data-only copy in the bounce buffer. */ if (lp->tx_skbuff[entry]) { - dev_kfree_skb_irq(lp->tx_skbuff[entry]); + dev_consume_skb_irq(lp->tx_skbuff[entry]); lp->tx_skbuff[entry] = NULL; } dirty_tx++; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 8931ce6bab7b..87ff5d6d1b22 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1028,7 +1028,7 @@ static void ni65_xmit_intr(struct net_device *dev,int csr0) #ifdef XMT_VIA_SKB if(p->tmd_skb[p->tmdlast]) { - dev_kfree_skb_irq(p->tmd_skb[p->tmdlast]); + dev_consume_skb_irq(p->tmd_skb[p->tmdlast]); p->tmd_skb[p->tmdlast] = NULL; } #endif From 5fbc136b487852b5ec06855497efb688b7aa8b4e Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:15:43 +0800 Subject: [PATCH 1176/1436] net: myri10ge: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in myri10ge_tx_done() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 19ce0e605096..e0340f778d8f 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1408,7 +1408,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) if (skb) { ss->stats.tx_bytes += skb->len; ss->stats.tx_packets++; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); if (len) pci_unmap_single(pdev, dma_unmap_addr(&tx->info[idx], From d1a096c2c72d605ddcbb53a5f71f96cf9c6d384c Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:17:06 +0800 Subject: [PATCH 1177/1436] net: sgi: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/ioc3-eth.c | 2 +- drivers/net/ethernet/sgi/meth.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 3140999642ba..358e66b81926 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -666,7 +666,7 @@ static inline void ioc3_tx(struct net_device *dev) packets++; skb = ip->tx_skbs[o_entry]; bytes += skb->len; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); ip->tx_skbs[o_entry] = NULL; o_entry = (o_entry + 1) & 127; /* Next */ diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 67954a9e3675..f425ab528224 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -525,7 +525,7 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) DPRINTK("RPTR points us here, but packet not done?\n"); break; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); priv->tx_skbs[priv->tx_read] = NULL; priv->tx_ring[priv->tx_read].header.raw = 0; priv->tx_read = (priv->tx_read+1)&(TX_RING_ENTRIES-1); From 105cfb064bcd60fdd74ac29a02f09efbc6a303d8 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:18:09 +0800 Subject: [PATCH 1178/1436] net: micrel: ks8695net: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in ks8695_tx_irq() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8695net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index b881f5d4a7f9..6006d47707cb 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -391,7 +391,7 @@ ks8695_tx_irq(int irq, void *dev_id) ksp->tx_buffers[buff_n].dma_ptr, ksp->tx_buffers[buff_n].length, DMA_TO_DEVICE); - dev_kfree_skb_irq(ksp->tx_buffers[buff_n].skb); + dev_consume_skb_irq(ksp->tx_buffers[buff_n].skb); ksp->tx_buffers[buff_n].skb = NULL; ksp->tx_ring_used--; } From 380ab7e3d59d790e05d39f3c1d4a6039ef0e3901 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:19:14 +0800 Subject: [PATCH 1179/1436] net: natsemi: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/natsemi/sonic.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index b9a1a9f999ea..1a2634cbbb69 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -2173,7 +2173,7 @@ static void netdev_tx_done(struct net_device *dev) np->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); /* Free the original skb. */ - dev_kfree_skb_irq(np->tx_skbuff[entry]); + dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; } if (netif_queue_stopped(dev) && diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 958fced4dacf..000009e18ff8 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1003,7 +1003,7 @@ static void do_tx_done(struct net_device *ndev) addr, len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); atomic_dec(&dev->nr_tx_skbs); } else pci_unmap_page(dev->pci_dev, diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index c805dcbebd02..aaec00912ea0 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -328,7 +328,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) } /* We must free the original skb */ - dev_kfree_skb_irq(lp->tx_skb[entry]); + dev_consume_skb_irq(lp->tx_skb[entry]); lp->tx_skb[entry] = NULL; /* and unmap DMA buffer */ dma_unmap_single(lp->device, lp->tx_laddr[entry], lp->tx_len[entry], DMA_TO_DEVICE); From ae6279ecb786d9132da0315700580dc23f939986 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Wed, 13 Feb 2019 23:21:02 +0800 Subject: [PATCH 1180/1436] net: nuvoton: w90p910_ether: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in w90p910_ether_start_xmit() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/nuvoton/w90p910_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index c662c6f5bee3..67bf02b0763a 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -630,7 +630,7 @@ static int w90p910_ether_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!(w90p910_send_frame(dev, skb->data, skb->len))) { ether->skb = skb; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); return 0; } return -EAGAIN; From a37d45b6fa1113f96200abdfdb0d8c454c97a30e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 13 Feb 2019 08:55:02 -0800 Subject: [PATCH 1181/1436] net: ipvlan_l3s: fix kconfig dependency warning Fix the kconfig warning in IPVLAN_L3S when neither INET nor IPV6 is enabled: WARNING: unmet direct dependencies detected for NET_L3_MASTER_DEV Depends on [n]: NET [=y] && (INET [=n] || IPV6 [=n]) Selected by [y]: - IPVLAN_L3S [=y] && NETDEVICES [=y] && NET_CORE [=y] && NETFILTER [=y] Signed-off-by: Randy Dunlap Cc: Mahesh Bandewar Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 7f9727f64f55..6f561767b45e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -147,6 +147,7 @@ config MACVTAP config IPVLAN_L3S depends on NETFILTER + depends on IPVLAN def_bool y select NET_L3_MASTER_DEV From 39c133196237335e8ee9e3694ef7921241cf6a41 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 13 Feb 2019 09:09:13 -0800 Subject: [PATCH 1182/1436] selftests: fix timestamping Makefile The clean target in the makefile conflicts with the generic kselftests lib.mk, and fails to properly remove the compiled test programs. Remove the redundant rule, the TEST_GEN_FILES will be already removed by the CLEAN macro in lib.mk. Signed-off-by: Deepa Dinamani Acked-by: Shuah Khan Signed-off-by: David S. Miller --- tools/testing/selftests/networking/timestamping/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 9050eeea5f5f..1de8bd8ccf5d 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -9,6 +9,3 @@ all: $(TEST_PROGS) top_srcdir = ../../../../.. KSFT_KHDR_INSTALL := 1 include ../../lib.mk - -clean: - rm -fr $(TEST_GEN_FILES) From a2fc9d7e36f6d484d9be4a0a204400aaf6059544 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 13 Feb 2019 20:11:40 +0100 Subject: [PATCH 1183/1436] net: phy: don't use locking in phy_is_started Russell suggested to remove the locking from phy_is_started() because the read is atomic anyway and actually the locking may be more misleading. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Suggested-by: Russell King - ARM Linux admin Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 11 +++++------ include/linux/phy.h | 15 +-------------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ca5e0c0f018c..602816d70281 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -553,7 +553,7 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (__phy_is_started(phydev)) { + if (phy_is_started(phydev)) { if (phydev->autoneg == AUTONEG_ENABLE) { err = phy_check_link_status(phydev); } else { @@ -709,7 +709,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (__phy_is_started(phydev)) + if (phy_is_started(phydev)) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); } @@ -839,15 +839,14 @@ EXPORT_SYMBOL(phy_stop_interrupts); */ void phy_stop(struct phy_device *phydev) { - mutex_lock(&phydev->lock); - - if (!__phy_is_started(phydev)) { + if (!phy_is_started(phydev)) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); - mutex_unlock(&phydev->lock); return; } + mutex_lock(&phydev->lock); + if (phy_interrupt_is_valid(phydev)) phy_disable_interrupts(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index ef20aeea10cc..127fcc9c3778 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -674,26 +674,13 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); -static inline bool __phy_is_started(struct phy_device *phydev) -{ - WARN_ON(!mutex_is_locked(&phydev->lock)); - - return phydev->state >= PHY_UP; -} - /** * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct */ static inline bool phy_is_started(struct phy_device *phydev) { - bool started; - - mutex_lock(&phydev->lock); - started = __phy_is_started(phydev); - mutex_unlock(&phydev->lock); - - return started; + return phydev->state >= PHY_UP; } void phy_resolve_aneg_linkmode(struct phy_device *phydev); From a20049071796691cf99eb6433968fc3c27632b95 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 13 Feb 2019 20:12:54 +0100 Subject: [PATCH 1184/1436] net: phy: fix potential race in the phylib state machine Russell reported the following race in the phylib state machine (quoting from his mail): if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); state = PHY_UP thread 0 thread 1 phy_disconnect() +-phy_is_started() phy_is_started() | `-phy_stop() +-phydev->state = PHY_HALTED `-phy_stop_machine() `-cancel_delayed_work_sync() phy_queue_state_machine() `-mod_delayed_work() At this point, the phydev->state_queue() has been added back onto the system workqueue despite phy_stop_machine() having been called and cancel_delayed_work_sync() called on it. Fix this by protecting the complete operation in thread 0. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Reported-by: Russell King - ARM Linux admin Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 602816d70281..c5675df5fc6f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -985,8 +985,10 @@ void phy_state_machine(struct work_struct *work) * state machine would be pointless and possibly error prone when * called from phy_disconnect() synchronously. */ + mutex_lock(&phydev->lock); if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); + mutex_unlock(&phydev->lock); } /** From 2c2ade81741c66082f8211f0b96cf509cc4c0218 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 13 Feb 2019 22:45:59 +0100 Subject: [PATCH 1185/1436] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs The basic idea behind ->pagecnt_bias is: If we pre-allocate the maximum number of references that we might need to create in the fastpath later, the bump-allocation fastpath only has to modify the non-atomic bias value that tracks the number of extra references we hold instead of the atomic refcount. The maximum number of allocations we can serve (under the assumption that no allocation is made with size 0) is nc->size, so that's the bias used. However, even when all memory in the allocation has been given away, a reference to the page is still held; and in the `offset < 0` slowpath, the page may be reused if everyone else has dropped their references. This means that the necessary number of references is actually `nc->size+1`. Luckily, from a quick grep, it looks like the only path that can call page_frag_alloc(fragsz=1) is TAP with the IFF_NAPI_FRAGS flag, which requires CAP_NET_ADMIN in the init namespace and is only intended to be used for kernel testing and fuzzing. To test for this issue, put a `WARN_ON(page_ref_count(page) == 0)` in the `offset < 0` path, below the virt_to_page() call, and then repeatedly call writev() on a TAP device with IFF_TAP|IFF_NO_PI|IFF_NAPI_FRAGS|IFF_NAPI, with a vector consisting of 15 elements containing 1 byte each. Signed-off-by: Jann Horn Signed-off-by: David S. Miller --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 35fdde041f5c..46285d28e43b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4675,11 +4675,11 @@ refill: /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size - 1); + page_ref_add(page, size); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; nc->offset = size; } @@ -4695,10 +4695,10 @@ refill: size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size); + set_page_count(page, size + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; + nc->pagecnt_bias = size + 1; offset = size - fragsz; } From 189a10f0108a8ebaf1e2f3d8bf68aeb5ebd30109 Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:39:59 +0800 Subject: [PATCH 1186/1436] isdn:hisax: Replace dev_kfree_skb_any by dev_consume_skb_any The skb should be freed by dev_consume_skb_any() in hfcpci_fill_fifo() when bcs->tx_skb is still used. The bcs->tx_skb is be replaced by skb_dequeue(&bcs->squeue), so the original bcs->tx_skb should be consumed(not drop). Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller --- drivers/isdn/hisax/hfc_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 81dd465afcf4..71a8312592d6 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -656,7 +656,7 @@ hfcpci_fill_fifo(struct BCState *bcs) schedule_event(bcs, B_ACKPENDING); } - dev_kfree_skb_any(bcs->tx_skb); + dev_consume_skb_any(bcs->tx_skb); bcs->tx_skb = skb_dequeue(&bcs->squeue); /* fetch next data */ } test_and_clear_bit(BC_FLG_BUSY, &bcs->Flag); From edc307bb7e4897df53248bd92f94221371c596ac Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:40:31 +0800 Subject: [PATCH 1187/1436] net:dl2k: Modify the code style escaping the warning modify the code style in order to removing the following warning when excute the script checkpatch.pl WARNING: space prohibited between function name and open parenthesis '(' Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d8d423f22c4f..d4ab63f07aef 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -843,9 +843,9 @@ rio_free_tx (struct net_device *dev, int irq) desc_to_dma(&np->tx_ring[entry]), skb->len, PCI_DMA_TODEVICE); if (irq) - dev_kfree_skb_irq (skb); + dev_kfree_skb_irq(skb); else - dev_kfree_skb (skb); + dev_kfree_skb(skb); np->tx_skbuff[entry] = NULL; entry = (entry + 1) % TX_RING_SIZE; From 62f2589883405081f3e0a4aae69f3fe8932348a3 Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:40:56 +0800 Subject: [PATCH 1188/1436] net:dl2k: Replace dev_kfree_skb_irq by dev_consume_skb_irq dev_consume_skb_irq() should be called when skb xmit done.It makes drop profiles more friendly. Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d4ab63f07aef..cfcdfee718b0 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -843,7 +843,7 @@ rio_free_tx (struct net_device *dev, int irq) desc_to_dma(&np->tx_ring[entry]), skb->len, PCI_DMA_TODEVICE); if (irq) - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); else dev_kfree_skb(skb); From f3e5c07002b08f0e001015fd221187efb772afae Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:41:18 +0800 Subject: [PATCH 1189/1436] net:ethernet:cadence: Replace dev_kfree_skb_any by dev_consume_skb_any The skb should be freed by dev_consume_skb_any() in macb_pad_and_fcs() when *skb is still used. The *skb is be replaced by nskb, so the original *skb should be consumed(not drop). Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 0d6288dd609a..f2915f2fe21a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1734,7 +1734,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) if (!nskb) return -ENOMEM; - dev_kfree_skb_any(*skb); + dev_consume_skb_any(*skb); *skb = nskb; } From f694be27b7c0f31e926277dbde4395d56e79f36e Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:42:13 +0800 Subject: [PATCH 1190/1436] sfc: Replace dev_kfree_skb_any by dev_consume_skb_any The skb should be freed by dev_consume_skb_any() in efx_tx_tso_fallback() when skb is still used. The skb will be replaced by segments, so the original skb should be consumed(not drop). Signed-off-by: Huang Zijiang Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 22eb059086f7..06c8f282263f 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -471,7 +471,7 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, if (IS_ERR(segments)) return PTR_ERR(segments); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); skb = segments; while (skb) { From c969c6e7ab8cb42b5c787c567615474fdbad9d6a Mon Sep 17 00:00:00 2001 From: Huang Zijiang Date: Thu, 14 Feb 2019 14:41:45 +0800 Subject: [PATCH 1191/1436] net: hns: Fix object reference leaks in hns_dsaf_roce_reset() The of_find_device_by_node() takes a reference to the underlying device structure, we should release that reference. Signed-off-by: Huang Zijiang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 3b9e74be5fbd..b8155f5e71b4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3081,6 +3081,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) dsaf_dev = dev_get_drvdata(&pdev->dev); if (!dsaf_dev) { dev_err(&pdev->dev, "dsaf_dev is NULL\n"); + put_device(&pdev->dev); return -ENODEV; } @@ -3088,6 +3089,7 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", dsaf_dev->ae_dev.name); + put_device(&pdev->dev); return -ENODEV; } From 3bf6b57ec2ec945e5a6edf5c202a754f1e852ecd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Feb 2019 12:33:19 -0500 Subject: [PATCH 1192/1436] Revert "nfsd4: return default lease period" This reverts commit d6ebf5088f09472c1136cd506bdc27034a6763f8. I forgot that the kernel's default lease period should never be decreased! After a kernel upgrade, the kernel has no way of knowing on its own what the previous lease time was. Unless userspace tells it otherwise, it will assume the previous lease period was the same. So if we decrease this value in a kernel upgrade, we end up enforcing a grace period that's too short, and clients will fail to reclaim state in time. Symptoms may include EIO and log messages like "NFS: nfs4_reclaim_open_state: Lock reclaim failed!" There was no real justification for the lease period decrease anyway. Reported-by: Donald Buczek Fixes: d6ebf5088f09 "nfsd4: return default lease period" Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b33f9785b756..72a7681f4046 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1239,8 +1239,8 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - nn->nfsd4_lease = 45; /* default lease time */ - nn->nfsd4_grace = 45; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); From d429005fdf2c9da19429c8b343eea61bd55b7c00 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Thu, 14 Feb 2019 18:19:15 +0530 Subject: [PATCH 1193/1436] cxgb4/cxgb4vf: Add support for SGE doorbell queue timer T6 introduced a Timer Mechanism in SGE called the SGE Doorbell Queue Timer. With this we can now configure TX Queues to get CIDX Updates when: Time(CIDX == PIDX) >= Timer Previously we rely on TX Queue Status Page updates by hardware for DMA completions. This will make Hardware/Firmware actually deliver the CIDX Updates as Ingress Queue messages with commensurate Interrupts. So we now have a new RX Path component for processing CIDX Updates and reclaiming TX Descriptors faster. Original work by: Casey Leedom Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 10 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 19 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 328 ++++++++++++++---- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 41 +++ drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 1 + .../net/ethernet/chelsio/cxgb4/t4_values.h | 6 + drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 24 +- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 27 +- 8 files changed, 378 insertions(+), 78 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 568715a13b5c..68d0d453eb06 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -617,6 +617,7 @@ enum { /* adapter flags */ FW_OFLD_CONN = (1 << 9), ROOT_NO_RELAXED_ORDERING = (1 << 10), SHUTTING_DOWN = (1 << 11), + SGE_DBQ_TIMER = (1 << 12), }; enum { @@ -756,6 +757,8 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ #ifdef CONFIG_CHELSIO_T4_DCB u8 dcb_prio; /* DCB Priority bound to queue */ #endif + u8 dbqt; /* SGE Doorbell Queue Timer in use */ + unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */ unsigned long tso; /* # of TSO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ @@ -816,6 +819,7 @@ struct sge { u16 nqs_per_uld; /* # of Rx queues per ULD */ u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; + u16 dbqtimer_val[SGE_NDBQTIMERS]; u32 fl_pg_order; /* large page allocation size */ u32 stat_len; /* length of status page at ring end */ u32 pktshift; /* padding between CPL & packet data */ @@ -1402,7 +1406,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, rspq_flush_handler_t flush_handler, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, - unsigned int iqid); + unsigned int iqid, u8 dbqt); int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); @@ -1415,6 +1419,8 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie); int t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); +int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q, + int maxreclaim); void cxgb4_set_ethtool_ops(struct net_device *netdev); int cxgb4_write_rss(const struct port_info *pi, const u16 *queues); enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb); @@ -1821,6 +1827,8 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type); +int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers, + u16 *dbqtimers); void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl); int t4_update_port_info(struct port_info *pi); int t4_get_link_params(struct port_info *pi, unsigned int *link_okp, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index adf75d16e6d3..bdd11a617579 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -575,7 +575,7 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, struct sge_eth_txq *eq; eq = container_of(txq, struct sge_eth_txq, q); - netif_tx_wake_queue(eq->txq); + t4_sge_eth_txq_egress_update(q->adap, eq, -1); } else { struct sge_uld_txq *oq; @@ -933,10 +933,13 @@ static int setup_sge_queues(struct adapter *adap) q->rspq.idx = j; memset(&q->stats, 0, sizeof(q->stats)); } - for (j = 0; j < pi->nqsets; j++, t++) { + + q = &s->ethrxq[pi->first_qset]; + for (j = 0; j < pi->nqsets; j++, t++, q++) { err = t4_sge_alloc_eth_txq(adap, t, dev, netdev_get_tx_queue(dev, j), - s->fw_evtq.cntxt_id); + q->rspq.cntxt_id, + !!(adap->flags & SGE_DBQ_TIMER)); if (err) goto freeout; } @@ -958,7 +961,7 @@ static int setup_sge_queues(struct adapter *adap) if (!is_t4(adap->params.chip)) { err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0], netdev_get_tx_queue(adap->port[0], 0) - , s->fw_evtq.cntxt_id); + , s->fw_evtq.cntxt_id, false); if (err) goto freeout; } @@ -4325,6 +4328,14 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; + /* Grab the SGE Doorbell Queue Timer values. If successful, that + * indicates that the Firmware and Hardware support this. + */ + ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(adap->sge.dbqtimer_val), + adap->sge.dbqtimer_val); + if (!ret) + adap->flags |= SGE_DBQ_TIMER; + if (is_bypass_device(adap->pdev->device)) adap->params.bypass = 1; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index fc0bc6458e84..f18493fb32aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -80,9 +80,10 @@ * Max number of Tx descriptors we clean up at a time. Should be modest as * freeing skbs isn't cheap and it happens while holding locks. We just need * to free packets faster than they arrive, we eventually catch up and keep - * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. + * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. It should + * also match the CIDX Flush Threshold. */ -#define MAX_TX_RECLAIM 16 +#define MAX_TX_RECLAIM 32 /* * Max number of Rx buffers we replenish at a time. Again keep this modest, @@ -400,6 +401,39 @@ static inline int reclaimable(const struct sge_txq *q) return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx; } +/** + * reclaim_completed_tx - reclaims completed TX Descriptors + * @adap: the adapter + * @q: the Tx queue to reclaim completed descriptors from + * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1 + * @unmap: whether the buffers should be unmapped for DMA + * + * Reclaims Tx Descriptors that the SGE has indicated it has processed, + * and frees the associated buffers if possible. If @max == -1, then + * we'll use a defaiult maximum. Called with the TX Queue locked. + */ +static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, + int maxreclaim, bool unmap) +{ + int reclaim = reclaimable(q); + + if (reclaim) { + /* + * Limit the amount of clean up work we do at a time to keep + * the Tx lock hold time O(1). + */ + if (maxreclaim < 0) + maxreclaim = MAX_TX_RECLAIM; + if (reclaim > maxreclaim) + reclaim = maxreclaim; + + free_tx_desc(adap, q, reclaim, unmap); + q->in_use -= reclaim; + } + + return reclaim; +} + /** * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors * @adap: the adapter @@ -410,22 +444,10 @@ static inline int reclaimable(const struct sge_txq *q) * and frees the associated buffers if possible. Called with the Tx * queue locked. */ -inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, - bool unmap) +void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, + bool unmap) { - int avail = reclaimable(q); - - if (avail) { - /* - * Limit the amount of clean up work we do at a time to keep - * the Tx lock hold time O(1). - */ - if (avail > MAX_TX_RECLAIM) - avail = MAX_TX_RECLAIM; - - free_tx_desc(adap, q, avail, unmap); - q->in_use -= avail; - } + (void)reclaim_completed_tx(adap, q, -1, unmap); } EXPORT_SYMBOL(cxgb4_reclaim_completed_tx); @@ -1287,6 +1309,44 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb, tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len)); } +/** + * t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update + * @adap: the adapter + * @eq: the Ethernet TX Queue + * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1 + * + * We're typically called here to update the state of an Ethernet TX + * Queue with respect to the hardware's progress in consuming the TX + * Work Requests that we've put on that Egress Queue. This happens + * when we get Egress Queue Update messages and also prophylactically + * in regular timer-based Ethernet TX Queue maintenance. + */ +int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq, + int maxreclaim) +{ + struct sge_txq *q = &eq->q; + unsigned int reclaimed; + + if (!q->in_use || !__netif_tx_trylock(eq->txq)) + return 0; + + /* Reclaim pending completed TX Descriptors. */ + reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true); + + /* If the TX Queue is currently stopped and there's now more than half + * the queue available, restart it. Otherwise bail out since the rest + * of what we want do here is with the possibility of shipping any + * currently buffered Coalesced TX Work Request. + */ + if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) { + netif_tx_wake_queue(eq->txq); + eq->q.restarts++; + } + + __netif_tx_unlock(eq->txq); + return reclaimed; +} + /** * cxgb4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@ -1357,7 +1417,7 @@ out_free: dev_kfree_skb_any(skb); } skb_tx_timestamp(skb); - cxgb4_reclaim_completed_tx(adap, &q->q, true); + reclaim_completed_tx(adap, &q->q, -1, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE @@ -1400,8 +1460,25 @@ out_free: dev_kfree_skb_any(skb); wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { + /* After we're done injecting the Work Request for this + * packet, we'll be below our "stop threshold" so stop the TX + * Queue now and schedule a request for an SGE Egress Queue + * Update message. The queue will get started later on when + * the firmware processes this Work Request and sends us an + * Egress Queue Status Update message indicating that space + * has opened up. + */ eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + + /* If we're using the SGE Doorbell Queue Timer facility, we + * don't need to ask the Firmware to send us Egress Queue CIDX + * Updates: the Hardware will do this automatically. And + * since we send the Ingress Queue CIDX Updates to the + * corresponding Ethernet Response Queue, we'll get them very + * quickly. + */ + if (!q->dbqt) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1671,7 +1748,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, /* Take this opportunity to reclaim any TX Descriptors whose DMA * transfers have completed. */ - cxgb4_reclaim_completed_tx(adapter, &txq->q, true); + reclaim_completed_tx(adapter, &txq->q, -1, true); /* Calculate the number of flits and TX Descriptors we're going to * need along with how many TX Descriptors will be left over after @@ -1715,7 +1792,16 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * has opened up. */ eth_txq_stop(txq); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + + /* If we're using the SGE Doorbell Queue Timer facility, we + * don't need to ask the Firmware to send us Egress Queue CIDX + * Updates: the Hardware will do this automatically. And + * since we send the Ingress Queue CIDX Updates to the + * corresponding Ethernet Response Queue, we'll get them very + * quickly. + */ + if (!txq->dbqt) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } /* Start filling in our Work Request. Note that we do _not_ handle @@ -2793,6 +2879,74 @@ static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb, return 1; } +/** + * t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages + * @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue + * @rsp: Response Entry pointer into Response Queue + * @gl: Gather List pointer + * + * For adapters which support the SGE Doorbell Queue Timer facility, + * we configure the Ethernet TX Queues to send CIDX Updates to the + * Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE + * messages. This adds a small load to PCIe Link RX bandwidth and, + * potentially, higher CPU Interrupt load, but allows us to respond + * much more quickly to the CIDX Updates. This is important for + * Upper Layer Software which isn't willing to have a large amount + * of TX Data outstanding before receiving DMA Completions. + */ +static void t4_tx_completion_handler(struct sge_rspq *rspq, + const __be64 *rsp, + const struct pkt_gl *gl) +{ + u8 opcode = ((const struct rss_header *)rsp)->opcode; + struct port_info *pi = netdev_priv(rspq->netdev); + struct adapter *adapter = rspq->adap; + struct sge *s = &adapter->sge; + struct sge_eth_txq *txq; + + /* skip RSS header */ + rsp++; + + /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG. + */ + if (unlikely(opcode == CPL_FW4_MSG && + ((const struct cpl_fw4_msg *)rsp)->type == + FW_TYPE_RSSCPL)) { + rsp++; + opcode = ((const struct rss_header *)rsp)->opcode; + rsp++; + } + + if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) { + pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n", + __func__, opcode); + return; + } + + txq = &s->ethtxq[pi->first_qset + rspq->idx]; + + /* We've got the Hardware Consumer Index Update in the Egress Update + * message. If we're using the SGE Doorbell Queue Timer mechanism, + * these Egress Update messages will be our sole CIDX Updates we get + * since we don't want to chew up PCIe bandwidth for both Ingress + * Messages and Status Page writes. However, The code which manages + * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value + * stored in the Status Page at the end of the TX Queue. It's easiest + * to simply copy the CIDX Update value from the Egress Update message + * to the Status Page. Also note that no Endian issues need to be + * considered here since both are Big Endian and we're just copying + * bytes consistently ... + */ + if (txq->dbqt) { + struct cpl_sge_egr_update *egr; + + egr = (struct cpl_sge_egr_update *)rsp; + WRITE_ONCE(txq->q.stat->cidx, egr->cidx); + } + + t4_sge_eth_txq_egress_update(adapter, txq, -1); +} + /** * t4_ethrx_handler - process an ingress ethernet packet * @q: the response queue that received the packet @@ -2816,6 +2970,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, struct port_info *pi; int ret = 0; + /* If we're looking at TX Queue CIDX Update, handle that separately + * and return. + */ + if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) || + (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) { + t4_tx_completion_handler(q, rsp, si); + return 0; + } + if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); @@ -3289,10 +3452,10 @@ done: static void sge_tx_timer_cb(struct timer_list *t) { - unsigned long m; - unsigned int i, budget; struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = &adap->sge; + unsigned long m, period; + unsigned int i, budget; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->txq_maperr[i]; m; m &= m - 1) { @@ -3320,29 +3483,29 @@ static void sge_tx_timer_cb(struct timer_list *t) budget = MAX_TIMER_TX_RECLAIM; i = s->ethtxq_rover; do { - struct sge_eth_txq *q = &s->ethtxq[i]; - - if (q->q.in_use && - time_after_eq(jiffies, q->txq->trans_start + HZ / 100) && - __netif_tx_trylock(q->txq)) { - int avail = reclaimable(&q->q); - - if (avail) { - if (avail > budget) - avail = budget; - - free_tx_desc(adap, &q->q, avail, true); - q->q.in_use -= avail; - budget -= avail; - } - __netif_tx_unlock(q->txq); - } + budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i], + budget); + if (!budget) + break; if (++i >= s->ethqsets) i = 0; - } while (budget && i != s->ethtxq_rover); + } while (i != s->ethtxq_rover); s->ethtxq_rover = i; - mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); + + if (budget == 0) { + /* If we found too many reclaimable packets schedule a timer + * in the near future to continue where we left off. + */ + period = 2; + } else { + /* We reclaimed all reclaimable TX Descriptors, so reschedule + * at the normal period. + */ + period = TX_QCHECK_PERIOD; + } + + mod_timer(&s->tx_timer, jiffies + period); } /** @@ -3421,7 +3584,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, : FW_IQ_IQTYPE_OFLD)); if (fl) { - enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); + unsigned int chip_ver = + CHELSIO_CHIP_VERSION(adap->params.chip); /* Allocate the ring for the hardware free list (with space * for its status page) along with the associated software @@ -3459,10 +3623,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, * the smaller 64-byte value there). */ c.fl0dcaen_to_fl0cidxfthresh = - htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? + htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ? FETCHBURSTMIN_128B_X : - FETCHBURSTMIN_64B_X) | - FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? + FETCHBURSTMIN_64B_T6_X) | + FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); c.fl0size = htons(flsz); @@ -3584,14 +3748,24 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) adap->sge.egr_map[id - adap->sge.egr_start] = q; } +/** + * t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue + * @adap: the adapter + * @txq: the SGE Ethernet TX Queue to initialize + * @dev: the Linux Network Device + * @netdevq: the corresponding Linux TX Queue + * @iqid: the Ingress Queue to which to deliver CIDX Update messages + * @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers + */ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, - unsigned int iqid) + unsigned int iqid, u8 dbqt) { - int ret, nentries; - struct fw_eq_eth_cmd c; - struct sge *s = &adap->sge; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); struct port_info *pi = netdev_priv(dev); + struct sge *s = &adap->sge; + struct fw_eq_eth_cmd c; + int ret, nentries; /* Add status entries */ nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); @@ -3610,19 +3784,47 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, FW_EQ_ETH_CMD_VFN_V(0)); c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F | FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); - c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | - FW_EQ_ETH_CMD_VIID_V(pi->viid)); + + /* For TX Ethernet Queues using the SGE Doorbell Queue Timer + * mechanism, we use Ingress Queue messages for Hardware Consumer + * Index Updates on the TX Queue. Otherwise we have the Hardware + * write the CIDX Updates into the Status Page at the end of the + * TX Queue. + */ + c.autoequiqe_to_viid = htonl((dbqt + ? FW_EQ_ETH_CMD_AUTOEQUIQE_F + : FW_EQ_ETH_CMD_AUTOEQUEQE_F) | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); + c.fetchszm_to_iqid = - htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt + ? HOSTFCMODE_INGRESS_QUEUE_X + : HOSTFCMODE_STATUS_PAGE_X) | FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + + /* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */ c.dcaen_to_eqsize = - htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); + c.eqaddr = cpu_to_be64(txq->q.phys_addr); + /* If we're using the SGE Doorbell Queue Timer mechanism, pass in the + * currently configured Timer Index. THis can be changed later via an + * ethtool -C tx-usecs {Timer Val} command. Note that the SGE + * Doorbell Queue mode is currently automatically enabled in the + * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ... + */ + if (dbqt) + c.timeren_timerix = + cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F | + FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix)); + ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); if (ret) { kfree(txq->q.sdesc); @@ -3639,6 +3841,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, txq->txq = netdevq; txq->tso = txq->tx_cso = txq->vlan_ins = 0; txq->mapping_err = 0; + txq->dbqt = dbqt; + return 0; } @@ -3646,10 +3850,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid) { - int ret, nentries; - struct fw_eq_ctrl_cmd c; - struct sge *s = &adap->sge; + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); struct port_info *pi = netdev_priv(dev); + struct sge *s = &adap->sge; + struct fw_eq_ctrl_cmd c; + int ret, nentries; /* Add status entries */ nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); @@ -3673,7 +3878,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid)); c.dcaen_to_eqsize = - htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); @@ -3713,6 +3920,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int uld_type) { + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); int ret, nentries; struct fw_eq_ofld_cmd c; struct sge *s = &adap->sge; @@ -3743,7 +3951,9 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq, FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid)); c.dcaen_to_eqsize = - htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index c5e5466ee38b..27af347be4af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -6712,6 +6712,47 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type) return ret; } +/** + * t4_read_sge_dbqtimers - reag SGE Doorbell Queue Timer values + * @adap - the adapter + * @ndbqtimers: size of the provided SGE Doorbell Queue Timer table + * @dbqtimers: SGE Doorbell Queue Timer table + * + * Reads the SGE Doorbell Queue Timer values into the provided table. + * Returns 0 on success (Firmware and Hardware support this feature), + * an error on failure. + */ +int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers, + u16 *dbqtimers) +{ + int ret, dbqtimerix; + + ret = 0; + dbqtimerix = 0; + while (dbqtimerix < ndbqtimers) { + int nparams, param; + u32 params[7], vals[7]; + + nparams = ndbqtimers - dbqtimerix; + if (nparams > ARRAY_SIZE(params)) + nparams = ARRAY_SIZE(params); + + for (param = 0; param < nparams; param++) + params[param] = + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) | + FW_PARAMS_PARAM_Y_V(dbqtimerix + param)); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + nparams, params, vals); + if (ret) + break; + + for (param = 0; param < nparams; param++) + dbqtimers[dbqtimerix++] = vals[param]; + } + return ret; +} + /** * t4_fw_hello - establish communication with FW * @adap: the adapter diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 361d5032c288..002fc62ea726 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -91,6 +91,7 @@ enum { SGE_CTXT_SIZE = 24, /* size of SGE context */ SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ + SGE_NDBQTIMERS = 8, /* # of Doorbell Queue Timer values */ SGE_MAX_IQ_SIZE = 65520, SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index f6558cbfc54e..eb1aa82149db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -71,12 +71,18 @@ #define FETCHBURSTMIN_64B_X 2 #define FETCHBURSTMIN_128B_X 3 +/* T6 and later use a single-bit encoding for FetchBurstMin */ +#define FETCHBURSTMIN_64B_T6_X 0 +#define FETCHBURSTMIN_128B_T6_X 1 + #define FETCHBURSTMAX_256B_X 2 #define FETCHBURSTMAX_512B_X 3 +#define HOSTFCMODE_INGRESS_QUEUE_X 1 #define HOSTFCMODE_STATUS_PAGE_X 2 #define CIDXFLUSHTHRESH_32_X 5 +#define CIDXFLUSHTHRESH_128_X 7 #define UPDATEDELIVERY_INTERRUPT_X 1 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 1d9b3e1e5f94..631f1663f4e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1254,6 +1254,8 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, + FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, + FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A, }; /* @@ -1322,6 +1324,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX = 0x15, FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; @@ -1751,8 +1754,8 @@ struct fw_eq_eth_cmd { __be32 fetchszm_to_iqid; __be32 dcaen_to_eqsize; __be64 eqaddr; - __be32 viid_pkd; - __be32 r8_lo; + __be32 autoequiqe_to_viid; + __be32 timeren_timerix; __be64 r9; }; @@ -1847,6 +1850,10 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_EQSIZE_S 0 #define FW_EQ_ETH_CMD_EQSIZE_V(x) ((x) << FW_EQ_ETH_CMD_EQSIZE_S) +#define FW_EQ_ETH_CMD_AUTOEQUIQE_S 31 +#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S) +#define FW_EQ_ETH_CMD_AUTOEQUIQE_F FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U) + #define FW_EQ_ETH_CMD_AUTOEQUEQE_S 30 #define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S) #define FW_EQ_ETH_CMD_AUTOEQUEQE_F FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U) @@ -1854,6 +1861,19 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_VIID_S 16 #define FW_EQ_ETH_CMD_VIID_V(x) ((x) << FW_EQ_ETH_CMD_VIID_S) +#define FW_EQ_ETH_CMD_TIMEREN_S 3 +#define FW_EQ_ETH_CMD_TIMEREN_M 0x1 +#define FW_EQ_ETH_CMD_TIMEREN_V(x) ((x) << FW_EQ_ETH_CMD_TIMEREN_S) +#define FW_EQ_ETH_CMD_TIMEREN_G(x) \ + (((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M) +#define FW_EQ_ETH_CMD_TIMEREN_F FW_EQ_ETH_CMD_TIMEREN_V(1U) + +#define FW_EQ_ETH_CMD_TIMERIX_S 0 +#define FW_EQ_ETH_CMD_TIMERIX_M 0x7 +#define FW_EQ_ETH_CMD_TIMERIX_V(x) ((x) << FW_EQ_ETH_CMD_TIMERIX_S) +#define FW_EQ_ETH_CMD_TIMERIX_G(x) \ + (((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M) + struct fw_eq_ctrl_cmd { __be32 op_to_vfn; __be32 alloc_to_len16; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 1d534f0baa69..11d2ba0a2bf5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2268,7 +2268,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, cmd.iqaddr = cpu_to_be64(rspq->phys_addr); if (fl) { - enum chip_type chip = + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); /* * Allocate the ring for the hardware free list (with space @@ -2319,10 +2319,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, */ cmd.fl0dcaen_to_fl0cidxfthresh = cpu_to_be16( - FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ? - FETCHBURSTMIN_128B_X : - FETCHBURSTMIN_64B_X) | - FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ? + FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_128B_X + : FETCHBURSTMIN_64B_T6_X) | + FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ? FETCHBURSTMAX_512B_X : FETCHBURSTMAX_256B_X)); cmd.fl0size = cpu_to_be16(flsz); @@ -2411,10 +2411,11 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *devq, unsigned int iqid) { + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + struct port_info *pi = netdev_priv(dev); + struct fw_eq_eth_cmd cmd, rpl; struct sge *s = &adapter->sge; int ret, nentries; - struct fw_eq_eth_cmd cmd, rpl; - struct port_info *pi = netdev_priv(dev); /* * Calculate the size of the hardware TX Queue (including the Status @@ -2448,17 +2449,19 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F | FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(cmd)); - cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F | - FW_EQ_ETH_CMD_VIID_V(pi->viid)); + cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + FW_EQ_ETH_CMD_VIID_V(pi->viid)); cmd.fetchszm_to_iqid = cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) | FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) | FW_EQ_ETH_CMD_IQID_V(iqid)); cmd.dcaen_to_eqsize = - cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) | - FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) | + cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5 + ? FETCHBURSTMIN_64B_X + : FETCHBURSTMIN_64B_T6_X) | + FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V( - SGE_CIDXFLUSHTHRESH_32) | + CIDXFLUSHTHRESH_32_X) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); cmd.eqaddr = cpu_to_be64(txq->q.phys_addr); From 543a1b85e734d2ec62e5df61641e8ea7349de4a8 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Thu, 14 Feb 2019 18:19:16 +0530 Subject: [PATCH 1194/1436] cxgb4: Add capability to get/set SGE Doorbell Queue Timer Tick This patch gets/sets SGE Doorbell Queue timer ticks via ethtool Original work by: Casey Leedom Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 189 +++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 14 +- 3 files changed, 198 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 68d0d453eb06..b7b0eb104430 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -819,6 +819,7 @@ struct sge { u16 nqs_per_uld; /* # of Rx queues per ULD */ u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; + u16 dbqtimer_tick; u16 dbqtimer_val[SGE_NDBQTIMERS]; u32 fl_pg_order; /* large page allocation size */ u32 stat_len; /* length of status page at ring end */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 796043544fc3..65b8dc706c1d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -932,11 +932,190 @@ static int get_adaptive_rx_setting(struct net_device *dev) return q->rspq.adaptive_rx; } -static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) +/* Return the current global Adapter SGE Doorbell Queue Timer Tick for all + * Ethernet TX Queues. + */ +static int get_dbqtimer_tick(struct net_device *dev) { - set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce); - return set_rx_intr_params(dev, c->rx_coalesce_usecs, - c->rx_max_coalesced_frames); + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + return adap->sge.dbqtimer_tick; +} + +/* Return the SGE Doorbell Queue Timer Value for the Ethernet TX Queues + * associated with a Network Device. + */ +static int get_dbqtimer(struct net_device *dev) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge_eth_txq *txq; + + txq = &adap->sge.ethtxq[pi->first_qset]; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* all of the TX Queues use the same Timer Index */ + return adap->sge.dbqtimer_val[txq->dbqtimerix]; +} + +/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX + * Queues. This is the fundamental "Tick" that sets the scale of values which + * can be used. Individual Ethernet TX Queues index into a relatively small + * array of Tick Multipliers. Changing the base Tick will thus change all of + * the resulting Timer Values associated with those multipliers for all + * Ethernet TX Queues. + */ +static int set_dbqtimer_tick(struct net_device *dev, int usecs) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge *s = &adap->sge; + u32 param, val; + int ret; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* return early if it's the same Timer Tick we're already using */ + if (s->dbqtimer_tick == usecs) + return 0; + + /* attempt to set the new Timer Tick value */ + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK)); + val = usecs; + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); + if (ret) + return ret; + s->dbqtimer_tick = usecs; + + /* if successful, reread resulting dependent Timer values */ + ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(s->dbqtimer_val), + s->dbqtimer_val); + return ret; +} + +/* Set the SGE Doorbell Queue Timer Value for the Ethernet TX Queues + * associated with a Network Device. There is a relatively small array of + * possible Timer Values so we need to pick the closest value available. + */ +static int set_dbqtimer(struct net_device *dev, int usecs) +{ + int qix, timerix, min_timerix, delta, min_delta; + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sge *s = &adap->sge; + struct sge_eth_txq *txq; + u32 param, val; + int ret; + + if (!(adap->flags & SGE_DBQ_TIMER)) + return 0; + + /* Find the SGE Doorbell Timer Value that's closest to the requested + * value. + */ + min_delta = INT_MAX; + min_timerix = 0; + for (timerix = 0; timerix < ARRAY_SIZE(s->dbqtimer_val); timerix++) { + delta = s->dbqtimer_val[timerix] - usecs; + if (delta < 0) + delta = -delta; + if (delta < min_delta) { + min_delta = delta; + min_timerix = timerix; + } + } + + /* Return early if it's the same Timer Index we're already using. + * We use the same Timer Index for all of the TX Queues for an + * interface so it's only necessary to check the first one. + */ + txq = &s->ethtxq[pi->first_qset]; + if (txq->dbqtimerix == min_timerix) + return 0; + + for (qix = 0; qix < pi->nqsets; qix++, txq++) { + if (adap->flags & FULL_INIT_DONE) { + param = + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) | + FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id)); + val = min_timerix; + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, + 1, ¶m, &val); + if (ret) + return ret; + } + txq->dbqtimerix = min_timerix; + } + return 0; +} + +/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX + * Queues and the Timer Value for the Ethernet TX Queues associated with a + * Network Device. Since changing the global Tick changes all of the + * available Timer Values, we need to do this first before selecting the + * resulting closest Timer Value. Moreover, since the Tick is global, + * changing it affects the Timer Values for all Network Devices on the + * adapter. So, before changing the Tick, we grab all of the current Timer + * Values for other Network Devices on this Adapter and then attempt to select + * new Timer Values which are close to the old values ... + */ +static int set_dbqtimer_tickval(struct net_device *dev, + int tick_usecs, int timer_usecs) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int timer[MAX_NPORTS]; + unsigned int port; + int ret; + + /* Grab the other adapter Network Interface current timers and fill in + * the new one for this Network Interface. + */ + for_each_port(adap, port) + if (port == pi->port_id) + timer[port] = timer_usecs; + else + timer[port] = get_dbqtimer(adap->port[port]); + + /* Change the global Tick first ... */ + ret = set_dbqtimer_tick(dev, tick_usecs); + if (ret) + return ret; + + /* ... and then set all of the Network Interface Timer Values ... */ + for_each_port(adap, port) { + ret = set_dbqtimer(adap->port[port], timer[port]); + if (ret) + return ret; + } + + return 0; +} + +static int set_coalesce(struct net_device *dev, + struct ethtool_coalesce *coalesce) +{ + int ret; + + set_adaptive_rx_setting(dev, coalesce->use_adaptive_rx_coalesce); + + ret = set_rx_intr_params(dev, coalesce->rx_coalesce_usecs, + coalesce->rx_max_coalesced_frames); + if (ret) + return ret; + + return set_dbqtimer_tickval(dev, + coalesce->tx_coalesce_usecs_irq, + coalesce->tx_coalesce_usecs); } static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) @@ -949,6 +1128,8 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ? adap->sge.counter_val[rq->pktcnt_idx] : 0; c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev); + c->tx_coalesce_usecs_irq = get_dbqtimer_tick(dev); + c->tx_coalesce_usecs = get_dbqtimer(dev); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index bdd11a617579..bcbac247a73d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4331,8 +4331,18 @@ static int adap_init0(struct adapter *adap) /* Grab the SGE Doorbell Queue Timer values. If successful, that * indicates that the Firmware and Hardware support this. */ - ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(adap->sge.dbqtimer_val), - adap->sge.dbqtimer_val); + params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK)); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + + if (!ret) { + adap->sge.dbqtimer_tick = val[0]; + ret = t4_read_sge_dbqtimers(adap, + ARRAY_SIZE(adap->sge.dbqtimer_val), + adap->sge.dbqtimer_val); + } + if (!ret) adap->flags |= SGE_DBQ_TIMER; From fa8ba2cba7f9c75b84f82d174658d959d25d4561 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 14 Feb 2019 15:39:07 +0100 Subject: [PATCH 1195/1436] lib: objagg: fix handling of object with 0 users when assembling hints It is possible that there might be an originally parent object with 0 direct users that is in hints no longer considered as parent. Then the weight of this object is 0 and current code ignores him. That's why the total amount of hint objects might be lower than for the original objagg and WARN_ON is hit. Fix this be considering 0 weight valid. Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- lib/objagg.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/objagg.c b/lib/objagg.c index d552ec9c60ed..576be22e86de 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -744,8 +744,6 @@ static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, * that this node can represent with delta. */ - if (node->crossed_out) - return 0; for (j = 0; j < graph->nodes_count; j++) { if (!objagg_tmp_graph_is_edge(graph, index, j)) continue; @@ -759,14 +757,18 @@ static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph) { + struct objagg_tmp_node *node; unsigned int max_weight = 0; unsigned int weight; int max_index = -1; int i; for (i = 0; i < graph->nodes_count; i++) { + node = &graph->nodes[i]; + if (node->crossed_out) + continue; weight = objagg_tmp_graph_node_weight(graph, i); - if (weight > max_weight) { + if (weight >= max_weight) { max_weight = weight; max_index = i; } From baff7b09ffa9055d54a366f26b02269ea579b268 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 22:45:38 +0800 Subject: [PATCH 1196/1436] net: i825xx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in i596_interrupt() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/i825xx/lib82596.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index 2f7ae118217f..1274ad24d6af 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1194,7 +1194,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) dma_unmap_single(dev->dev.parent, tx_cmd->dma_addr, skb->len, DMA_TO_DEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); tx_cmd->cmd.command = 0; /* Mark free */ break; From d1441d4782f2d3d7b9596df8076a2f213a34dcf5 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 22:50:58 +0800 Subject: [PATCH 1197/1436] net: xilinx: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- drivers/net/ethernet/xilinx/xilinx_emaclite.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 15bb058db392..44efffbe7970 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -630,7 +630,7 @@ static void temac_start_xmit_done(struct net_device *ndev) dma_unmap_single(ndev->dev.parent, cur_p->phys, cur_p->len, DMA_TO_DEVICE); if (cur_p->app4) - dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); + dev_consume_skb_irq((struct sk_buff *)cur_p->app4); cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0789d8af7d72..ec7e7ec24ff9 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -595,7 +595,7 @@ static void axienet_start_xmit_done(struct net_device *ndev) (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); if (cur_p->app4) - dev_kfree_skb_irq((struct sk_buff *)cur_p->app4); + dev_consume_skb_irq((struct sk_buff *)cur_p->app4); /*cur_p->phys = 0;*/ cur_p->app0 = 0; cur_p->app1 = 0; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 639e3e99af46..b03a417d0073 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -581,7 +581,7 @@ static void xemaclite_tx_handler(struct net_device *dev) return; dev->stats.tx_bytes += lp->deferred_skb->len; - dev_kfree_skb_irq(lp->deferred_skb); + dev_consume_skb_irq(lp->deferred_skb); lp->deferred_skb = NULL; netif_trans_update(dev); /* prevent tx timeout */ netif_wake_queue(dev); From 1bba6de1a67f10270dd0f0951050d0470514c2a5 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 22:52:28 +0800 Subject: [PATCH 1198/1436] net: packetengines: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/hamachi.c | 2 +- drivers/net/ethernet/packetengines/yellowfin.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index c9529c29a0a7..eee883a2aa8d 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -1337,7 +1337,7 @@ static irqreturn_t hamachi_interrupt(int irq, void *dev_instance) leXX_to_cpu(hmp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); hmp->tx_skbuff[entry] = NULL; } hmp->tx_ring[entry].status_n_length = 0; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 54224d1822e3..6f8d6584f809 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -925,7 +925,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance) /* Free the original skb. */ pci_unmap_single(yp->pci_dev, le32_to_cpu(yp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); yp->tx_skbuff[entry] = NULL; } if (yp->tx_full && @@ -983,7 +983,7 @@ static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance) pci_unmap_single(yp->pci_dev, yp->tx_ring[entry<<1].addr, skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); yp->tx_skbuff[entry] = 0; /* Mark status as empty. */ yp->tx_status[entry].tx_errs = 0; From 67633e78646c7be5e2e4bd1255da22dd4381c1e1 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 22:53:30 +0800 Subject: [PATCH 1199/1436] net: arc_emac: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in arc_emac_tx_clean() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 4406325fdd9f..ff3d68532f5f 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -148,7 +148,7 @@ static void arc_emac_tx_clean(struct net_device *ndev) dma_unmap_len(tx_buff, len), DMA_TO_DEVICE); /* return the sk_buff to system */ - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); txbd->data = 0; txbd->info = 0; From 467d2fceafb6c1dc3cf444c86266a6c0334b1f3b Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 22:55:14 +0800 Subject: [PATCH 1200/1436] net: 3com: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c515.c | 4 ++-- drivers/net/ethernet/3com/3c59x.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b648e3f95c01..808abb6b3671 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1177,7 +1177,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id) if (inl(ioaddr + DownListPtr) == isa_virt_to_bus(&lp->tx_ring[entry])) break; /* It still hasn't been processed. */ if (lp->tx_skbuff[entry]) { - dev_kfree_skb_irq(lp->tx_skbuff[entry]); + dev_consume_skb_irq(lp->tx_skbuff[entry]); lp->tx_skbuff[entry] = NULL; } dirty_tx++; @@ -1192,7 +1192,7 @@ static irqreturn_t corkscrew_interrupt(int irq, void *dev_id) #ifdef VORTEX_BUS_MASTER if (status & DMADone) { outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ - dev_kfree_skb_irq(lp->tx_skb); /* Release the transferred buffer */ + dev_consume_skb_irq(lp->tx_skb); /* Release the transferred buffer */ netif_wake_queue(dev); } #endif diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 40f421dbdf57..147051404194 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2307,7 +2307,7 @@ _vortex_interrupt(int irq, struct net_device *dev) dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE); pkts_compl++; bytes_compl += vp->tx_skb->len; - dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ + dev_consume_skb_irq(vp->tx_skb); /* Release the transferred buffer */ if (ioread16(ioaddr + TxFree) > 1536) { /* * AKPM: FIXME: I don't think we need this. If the queue was stopped due to @@ -2449,7 +2449,7 @@ _boomerang_interrupt(int irq, struct net_device *dev) #endif pkts_compl++; bytes_compl += skb->len; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); vp->tx_skbuff[entry] = NULL; } else { pr_debug("boomerang_interrupt: no skb!\n"); From e772261b53bc45f53693ae4514658cd376da9e56 Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 14 Feb 2019 23:06:40 +0800 Subject: [PATCH 1201/1436] net: adaptec: starfire: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles dev_consume_skb_irq() should be called in intr_handler() when skb xmit done. It makes drop profiles(dropwatch, perf) more friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/adaptec/starfire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 097467f44b0d..816540e6beac 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1390,7 +1390,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) } } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } np->tx_done_q[np->tx_done].status = 0; np->tx_done = (np->tx_done + 1) % DONE_Q_SIZE; From f9bcc9f3ee4fbbe8f11dfec76745476f5780517e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 14 Feb 2019 11:15:35 -0500 Subject: [PATCH 1202/1436] net: ethernet: freescale: set FEC ethtool regs version Currently the ethtool_regs version is set to 0 for FEC devices. Use this field to store the register dump version exposed by the kernel. The choosen version 2 corresponds to the kernel compile test: #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST) and version 1 corresponds to the opposite. Binaries of ethtool unaware of this version will dump the whole set as usual. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2370dc204202..697c2427f2b7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2098,6 +2098,7 @@ static int fec_enet_get_regs_len(struct net_device *ndev) #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST) +static __u32 fec_enet_register_version = 2; static u32 fec_enet_register_offset[] = { FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, @@ -2128,6 +2129,7 @@ static u32 fec_enet_register_offset[] = { IEEE_R_FDXFC, IEEE_R_OCTETS_OK }; #else +static __u32 fec_enet_register_version = 1; static u32 fec_enet_register_offset[] = { FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0, FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0, @@ -2149,6 +2151,8 @@ static void fec_enet_get_regs(struct net_device *ndev, u32 *buf = (u32 *)regbuf; u32 i, off; + regs->version = fec_enet_register_version; + memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { From 50f444aa50a4f3fab35a04f56d6bb83dc1e8c875 Mon Sep 17 00:00:00 2001 From: Robert Stonehouse Date: Thu, 14 Feb 2019 17:27:43 +0000 Subject: [PATCH 1203/1436] sfc: ensure recovery after allocation failures After failing to allocate a receive buffer the driver may fail to ever request additional allocations. EF10 NICs require new receive buffers to be pushed in batches of eight or more. The test for whether a slow fill should be scheduled failed to take account of this. There is little downside to *always* requesting a slow fill if we failed to allocate a buffer, so the condition has been removed completely. The timer that triggers the request for a refill has also been shortened. Signed-off-by: Robert Stonehouse Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/sfc/rx.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 3643015a55cf..bc655ffc9e02 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -915,7 +915,7 @@ rollback: void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) { - mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); + mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); } static bool efx_default_channel_want_txqs(struct efx_channel *channel) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 396ff01298cd..8702ab44d80b 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -360,8 +360,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) rc = efx_init_rx_buffers(rx_queue, atomic); if (unlikely(rc)) { /* Ensure that we don't leave the rx queue empty */ - if (rx_queue->added_count == rx_queue->removed_count) - efx_schedule_slow_fill(rx_queue); + efx_schedule_slow_fill(rx_queue); goto out; } } while ((space -= batch_size) >= batch_size); From 23e93c9b2cde73f9912d0d8534adbddd3dcc48f4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Feb 2019 15:12:17 -0500 Subject: [PATCH 1204/1436] Revert "gfs2: read journal in large chunks to locate the head" This reverts commit 2a5f14f279f59143139bcd1606903f2f80a34241. This patch causes xfstests generic/311 to fail. Reverting this for now until we have a proper fix. Signed-off-by: Abhi Das Signed-off-by: Bob Peterson Signed-off-by: Linus Torvalds --- fs/gfs2/glops.c | 1 - fs/gfs2/log.c | 4 +- fs/gfs2/lops.c | 190 ++----------------------------------------- fs/gfs2/lops.h | 4 +- fs/gfs2/ops_fstype.c | 1 - fs/gfs2/recovery.c | 123 ++++++++++++++++++++++++++++ fs/gfs2/recovery.h | 2 + fs/gfs2/super.c | 1 - 8 files changed, 134 insertions(+), 192 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index f15b4c57c4bd..78510ab91835 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,7 +28,6 @@ #include "util.h" #include "trans.h" #include "dir.h" -#include "lops.h" struct workqueue_struct *gfs2_freeze_wq; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 5bfaf381921a..b8830fda51e8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, lh->lh_crc = cpu_to_be32(crc); gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr); - gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags); + gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags); log_flush_wait(sdp); } @@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) gfs2_ordered_write(sdp); lops_before_commit(sdp, tr); - gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); + gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0); if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 94dcab655bc0..2295042bc625 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -17,9 +17,7 @@ #include #include #include -#include -#include "bmap.h" #include "dir.h" #include "gfs2.h" #include "incore.h" @@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, /** * gfs2_end_log_write - end of i/o to the log * @bio: The bio + * @error: Status of i/o request * * Each bio_vec contains either data from the pagecache or data * relating to the log itself. Here we iterate over the bio_vec @@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio) /** * gfs2_log_submit_bio - Submit any pending log bio * @biop: Address of the bio pointer - * @opf: REQ_OP | op_flags + * @op: REQ_OP + * @op_flags: req_flag_bits * * Submit any pending part-built or full bio to the block device. If * there is no pending bio, then this is a no-op. */ -void gfs2_log_submit_bio(struct bio **biop, int opf) +void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags) { struct bio *bio = *biop; if (bio) { struct gfs2_sbd *sdp = bio->bi_private; atomic_inc(&sdp->sd_log_in_flight); - bio->bi_opf = opf; + bio_set_op_attrs(bio, op, op_flags); submit_bio(bio); *biop = NULL; } @@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno, nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk && !flush) return bio; - gfs2_log_submit_bio(biop, op); + gfs2_log_submit_bio(biop, op, 0); } *biop = gfs2_log_alloc_bio(sdp, blkno, end_io); @@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page) gfs2_log_bmap(sdp)); } -/** - * gfs2_end_log_read - end I/O callback for reads from the log - * @bio: The bio - * - * Simply unlock the pages in the bio. The main thread will wait on them and - * process them in order as necessary. - */ - -static void gfs2_end_log_read(struct bio *bio) -{ - struct page *page; - struct bio_vec *bvec; - int i; - - bio_for_each_segment_all(bvec, bio, i) { - page = bvec->bv_page; - if (bio->bi_status) { - int err = blk_status_to_errno(bio->bi_status); - - SetPageError(page); - mapping_set_error(page->mapping, err); - } - unlock_page(page); - } - - bio_put(bio); -} - -/** - * gfs2_jhead_pg_srch - Look for the journal head in a given page. - * @jd: The journal descriptor - * @page: The page to look in - * - * Returns: 1 if found, 0 otherwise. - */ - -static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, - struct page *page) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_log_header_host uninitialized_var(lh); - void *kaddr = kmap_atomic(page); - unsigned int offset; - bool ret = false; - - for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { - if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) - *head = lh; - else { - ret = true; - break; - } - } - } - kunmap_atomic(kaddr); - return ret; -} - -/** - * gfs2_jhead_process_page - Search/cleanup a page - * @jd: The journal descriptor - * @index: Index of the page to look into - * @done: If set, perform only cleanup, else search and set if found. - * - * Find the page with 'index' in the journal's mapping. Search the page for - * the journal head if requested (cleanup == false). Release refs on the - * page so the page cache can reclaim it (put_page() twice). We grabbed a - * reference on this page two times, first when we did a find_or_create_page() - * to obtain the page to add it to the bio and second when we do a - * find_get_page() here to get the page to wait on while I/O on it is being - * completed. - * This function is also used to free up a page we might've grabbed but not - * used. Maybe we added it to a bio, but not submitted it for I/O. Or we - * submitted the I/O, but we already found the jhead so we only need to drop - * our references to the page. - */ - -static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, - struct gfs2_log_header_host *head, - bool *done) -{ - struct page *page; - - page = find_get_page(jd->jd_inode->i_mapping, index); - wait_on_page_locked(page); - - if (PageError(page)) - *done = true; - - if (!*done) - *done = gfs2_jhead_pg_srch(jd, head, page); - - put_page(page); /* Once for find_get_page */ - put_page(page); /* Once more for find_or_create_page */ -} - -/** - * gfs2_find_jhead - find the head of a log - * @jd: The journal descriptor - * @head: The log descriptor for the head of the log is returned here - * - * Do a search of a journal by reading it in large chunks using bios and find - * the valid log entry with the highest sequence number. (i.e. the log head) - * - * Returns: 0 on success, errno otherwise - */ - -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) -{ - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct address_space *mapping = jd->jd_inode->i_mapping; - struct gfs2_journal_extent *je; - u32 block, read_idx = 0, submit_idx = 0, index = 0; - int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; - int blocks_per_page = 1 << shift, sz, ret = 0; - struct bio *bio = NULL; - struct page *page; - bool done = false; - errseq_t since; - - memset(head, 0, sizeof(*head)); - if (list_empty(&jd->extent_list)) - gfs2_map_journal_extents(sdp, jd); - - since = filemap_sample_wb_err(mapping); - list_for_each_entry(je, &jd->extent_list, list) { - for (block = 0; block < je->blocks; block += blocks_per_page) { - index = (je->lblock + block) >> shift; - - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - done = true; - goto out; - } - - if (bio) { - sz = bio_add_page(bio, page, PAGE_SIZE, 0); - if (sz == PAGE_SIZE) - goto page_added; - submit_idx = index; - submit_bio(bio); - bio = NULL; - } - - bio = gfs2_log_alloc_bio(sdp, - je->dblock + (index << shift), - gfs2_end_log_read); - bio->bi_opf = REQ_OP_READ; - sz = bio_add_page(bio, page, PAGE_SIZE, 0); - gfs2_assert_warn(sdp, sz == PAGE_SIZE); - -page_added: - if (submit_idx <= read_idx + BIO_MAX_PAGES) { - /* Keep at least one bio in flight */ - continue; - } - - gfs2_jhead_process_page(jd, read_idx++, head, &done); - if (done) - goto out; /* found */ - } - } - -out: - if (bio) - submit_bio(bio); - while (read_idx <= index) - gfs2_jhead_process_page(jd, read_idx++, head, &done); - - if (!ret) - ret = filemap_check_wb_err(mapping, since); - - return ret; -} - static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, u32 ld_length, u32 ld_data1) { diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 331160fc568b..711c4d89c063 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp); extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, unsigned size, unsigned offset, u64 blkno); extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); -extern void gfs2_log_submit_bio(struct bio **biop, int opf); +extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1179763f6370..b041cb8ae383 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -41,7 +41,6 @@ #include "dir.h" #include "meta_io.h" #include "trace_gfs2.h" -#include "lops.h" #define DO 0 #define UNDO 1 diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 7389e445a7a7..2dac43065382 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -181,6 +181,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, return error; } +/** + * find_good_lh - find a good log header + * @jd: the journal + * @blk: the segment to start searching from + * @lh: the log header to fill in + * @forward: if true search forward in the log, else search backward + * + * Call get_log_header() to get a log header for a segment, but if the + * segment is bad, either scan forward or backward until we find a good one. + * + * Returns: errno + */ + +static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, + struct gfs2_log_header_host *head) +{ + unsigned int orig_blk = *blk; + int error; + + for (;;) { + error = get_log_header(jd, *blk, head); + if (error <= 0) + return error; + + if (++*blk == jd->jd_blocks) + *blk = 0; + + if (*blk == orig_blk) { + gfs2_consist_inode(GFS2_I(jd->jd_inode)); + return -EIO; + } + } +} + +/** + * jhead_scan - make sure we've found the head of the log + * @jd: the journal + * @head: this is filled in with the log descriptor of the head + * + * At this point, seg and lh should be either the head of the log or just + * before. Scan forward until we find the head. + * + * Returns: errno + */ + +static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) +{ + unsigned int blk = head->lh_blkno; + struct gfs2_log_header_host lh; + int error; + + for (;;) { + if (++blk == jd->jd_blocks) + blk = 0; + + error = get_log_header(jd, blk, &lh); + if (error < 0) + return error; + if (error == 1) + continue; + + if (lh.lh_sequence == head->lh_sequence) { + gfs2_consist_inode(GFS2_I(jd->jd_inode)); + return -EIO; + } + if (lh.lh_sequence < head->lh_sequence) + break; + + *head = lh; + } + + return 0; +} + +/** + * gfs2_find_jhead - find the head of a log + * @jd: the journal + * @head: the log descriptor for the head of the log is returned here + * + * Do a binary search of a journal and find the valid log entry with the + * highest sequence number. (i.e. the log head) + * + * Returns: errno + */ + +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) +{ + struct gfs2_log_header_host lh_1, lh_m; + u32 blk_1, blk_2, blk_m; + int error; + + blk_1 = 0; + blk_2 = jd->jd_blocks - 1; + + for (;;) { + blk_m = (blk_1 + blk_2) / 2; + + error = find_good_lh(jd, &blk_1, &lh_1); + if (error) + return error; + + error = find_good_lh(jd, &blk_m, &lh_m); + if (error) + return error; + + if (blk_1 == blk_m || blk_m == blk_2) + break; + + if (lh_1.lh_sequence <= lh_m.lh_sequence) + blk_1 = blk_m; + else + blk_2 = blk_m; + } + + error = jhead_scan(jd, &lh_1); + if (error) + return error; + + *head = lh_1; + + return error; +} + /** * foreach_descriptor - go through the active part of the log * @jd: the journal diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 99575ab81202..11d81248be85 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); +extern int gfs2_find_jhead(struct gfs2_jdesc *jd, + struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); extern void gfs2_recover_func(struct work_struct *work); extern int __get_log_header(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d4b11c903971..ca71163ff7cf 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -45,7 +45,6 @@ #include "util.h" #include "sys.h" #include "xattr.h" -#include "lops.h" #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) From 20bbf22a622178db71fb8bea5f9000d6f346185a Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:32 -0800 Subject: [PATCH 1205/1436] net/mlx5: Use void pointer as the type in address_of macro Better to use void * and avoid unnecessary casts. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0845a227a7b2..46223efa1877 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,7 +67,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ From 7e4c4330a3bc7f34f82b5bc370821e1e16d425fb Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:33 -0800 Subject: [PATCH 1206/1436] net/mlx5: Use consistent vport num argument type Use u16 for vport number, which matches how hardware refers to this argument throughout commands. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/vport.c | 8 ++--- include/linux/mlx5/vport.h | 8 ++--- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a44ea7b85614..d7382892e81c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -52,7 +52,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -115,7 +115,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -152,7 +152,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -215,7 +215,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -224,7 +224,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -237,7 +237,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -377,7 +377,7 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; /* Skip mlx5_mpfs_add_mac for PFs, @@ -409,7 +409,7 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; /* Skip mlx5_mpfs_del_mac for PFs, @@ -438,7 +438,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + u16 vport_idx = 0; for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { struct mlx5_vport *vport = &esw->vports[vport_idx]; @@ -485,7 +485,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -525,7 +525,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -564,7 +564,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -599,7 +599,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -686,7 +686,7 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; struct l2addr_node *node; @@ -721,7 +721,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; @@ -764,7 +764,7 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; int promisc_all = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9b150ce9d315..9a928eb48522 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -255,7 +255,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +373,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +526,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +827,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 9c694808c212..1654b911cdb2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -60,7 +60,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid); + u16 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, @@ -78,7 +78,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size); @@ -87,7 +87,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int list_size); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all); @@ -96,7 +96,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_mc, int promisc_all); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size); int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, From f0666f1f22b5748e7caf87481a0869689f9f513d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:34 -0800 Subject: [PATCH 1207/1436] IB/mlx5: Use unified register/load function for uplink and VF vports IB driver maintains different registration and load function calls for uplink and VF vports. This is not necessary as they only differ with each other on their profiles. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 73 +++++++---------------------- drivers/infiniband/hw/mlx5/ib_rep.h | 10 ++-- drivers/infiniband/hw/mlx5/main.c | 26 +++++----- 3 files changed, 37 insertions(+), 72 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..3fb22967c098 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -6,7 +6,7 @@ #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile rep_profile = { +static const struct mlx5_ib_profile vf_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -45,31 +45,18 @@ static const struct mlx5_ib_profile rep_profile = { NULL), }; -static int -mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - if (!__mlx5_ib_add(ibdev, ibdev->profile)) - return -EINVAL; - return 0; -} - -static void -mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) -{ - struct mlx5_ib_dev *ibdev; - - ibdev = mlx5_ib_rep_to_dev(rep); - __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX); -} - static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { +#define FDB_UPLINK_VPORT 0xffff + const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + if (rep->vport == FDB_UPLINK_VPORT) + profile = &uplink_rep_profile; + else + profile = &vf_rep_profile; + ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); if (!ibdev) return -ENOMEM; @@ -78,7 +65,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, &rep_profile)) + if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; rep->rep_if[REP_IB].priv = ibdev; @@ -105,15 +92,14 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } -static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vports = MLX5_TOTAL_VPORTS(mdev); + struct mlx5_eswitch_rep_if rep_if = {}; int vport; - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; - + for (vport = 0; vport < total_vports; vport++) { rep_if.load = mlx5_ib_vport_rep_load; rep_if.unload = mlx5_ib_vport_rep_unload; rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; @@ -121,39 +107,16 @@ static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) } } -static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev) +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vports = MLX5_TOTAL_VPORTS(mdev); int vport; - for (vport = 1; vport < total_vfs; vport++) + for (vport = total_vports - 1; vport >= 0; vport--) mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); } -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5_ib_nic_rep_load; - rep_if.unload = mlx5_ib_nic_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - rep_if.priv = dev; - - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB); - - mlx5_ib_rep_register_vf_vports(dev); -} - -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - - mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/ -} - u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) { return mlx5_eswitch_mode(esw); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 2ba73636a2fb..798d41e61fb4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -10,14 +10,16 @@ #include "mlx5_ib.h" #ifdef CONFIG_MLX5_ESWITCH +extern const struct mlx5_ib_profile uplink_rep_profile; + u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, int vport_index); struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); -void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev); -void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev); +void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, @@ -48,8 +50,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, return NULL; } -static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} +static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..87ce62e44898 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6386,7 +6386,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_delay_drop_cleanup), }; -static const struct mlx5_ib_profile nic_rep_profile = { +const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -6479,6 +6479,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) printk_once(KERN_INFO "%s", mlx5_version); + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_ib_register_vport_reps(mdev); + return mdev; + } + port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -6493,14 +6499,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); - if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); - dev->profile = &nic_rep_profile; - mlx5_ib_register_vport_reps(dev); - return dev; - } - return __mlx5_ib_add(dev, &pf_profile); } @@ -6509,6 +6507,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; + if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) { + mlx5_ib_unregister_vport_reps(mdev); + return; + } + if (mlx5_core_is_mp_slave(mdev)) { mpi = context; mutex_lock(&mlx5_ib_multiport_mutex); @@ -6520,10 +6523,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) } dev = context; - if (dev->profile == &nic_rep_profile) - mlx5_ib_unregister_vport_reps(dev); - else - __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); ib_dealloc_device((struct ib_device *)dev); } From 591905ba96796e3b677b14fa79f27127bfaab4ab Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:35 -0800 Subject: [PATCH 1208/1436] net/mlx5: Introduce Mellanox SmartNIC and modify page management logic Mellanox's SmartNIC combines embedded CPU(e.g, ARM) processing power with advanced network offloads to accelerate a multitude of security, networking and storage applications. With the introduction of the SmartNIC, there is a new PCI function called Embedded CPU Physical Function(ECPF). And it's possible for a PF to get its ICM pages from the ECPF PCI function. Driver shall identify if it is running on such a function by reading a bit in the initialization segment. When firmware asks for pages, it would issue a page request event specifying how many pages it requests and for which function. That driver responds with a manage_pages command providing the requested pages along with an indication for which function it is providing these pages. The encoding before this patch was as follows: function_id == 0: pages are requested for the function receiving the EQE. function_id != 0: pages are requested for VF identified by the function_id value A new one bit field in the EQE identifies that pages are requested for the ECPF. The notion of page_supplier can be introduced here and to support that, manage pages and query pages were modified so firmware can distinguish the following cases: 1. Function provides pages for itself 2. PF provides pages for its VF 3. ECPF provides pages to itself 4. ECPF provides pages for another function This distinction is possible through the introduction of the bit "embedded_cpu_function" in query_pages, manage_pages and page request EQE. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 9 ++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 25 +++++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 2 + .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- .../ethernet/mellanox/mlx5/core/pagealloc.c | 54 +++++++++++++------ .../net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/driver.h | 9 +++- include/linux/mlx5/mlx5_ifc.h | 12 +++-- 10 files changed, 94 insertions(+), 25 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 0257731e6d42..07965350b903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..28b8c5c5c8c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..8b684f0ab48f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6d45518edbdc..08a3da2a8358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -65,6 +65,7 @@ #include "lib/vxlan.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -898,6 +899,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c68dcea5985b..b127044293b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -121,7 +121,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index a83b517b0714..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -48,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -253,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -305,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -316,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -328,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } +enum { + EC_FUNCTION_MASK = 0x8000, +}; + static int req_pages_handler(struct notifier_block *nb, unsigned long type, void *data) { @@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb, struct mlx5_core_dev *dev; struct mlx5_priv *priv; struct mlx5_eqe *eqe; + bool ec_function; u16 func_id; s32 npages; @@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb, func_id = be16_to_cpu(eqe->data.req_pages.func_id); npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); @@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb, req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); return NOTIFY_OK; @@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } @@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6e178030d8fb..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 46223efa1877..f2070350f60a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -591,7 +591,7 @@ struct mlx5_eqe_cmd { }; struct mlx5_eqe_page_req { - u8 rsvd0[2]; + __be16 ec_function; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 039c9398614c..cce4e8293384 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -522,6 +522,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; + int peer_pf_pages; struct mlx5_core_health health; @@ -652,6 +653,7 @@ struct mlx5_core_dev { u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; + u8 embedded_cpu; } caps; u64 sys_image_guid; phys_addr_t iseg_base; @@ -922,7 +924,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages); + s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1076,6 +1078,11 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } +static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu; +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c5c679390fbd..46799b4c8859 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4441,7 +4441,8 @@ struct mlx5_ifc_query_pages_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 num_pages[0x20]; @@ -4460,7 +4461,8 @@ struct mlx5_ifc_query_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5880,7 +5882,8 @@ struct mlx5_ifc_manage_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 input_num_entries[0x20]; @@ -8749,7 +8752,8 @@ struct mlx5_ifc_initial_seg_bits { u8 initializing[0x1]; u8 reserved_at_fe1[0x4]; u8 nic_interface_supported[0x3]; - u8 reserved_at_fe8[0x18]; + u8 embedded_cpu[0x1]; + u8 reserved_at_fe9[0x17]; struct mlx5_ifc_health_buffer_bits health_buffer; From 22e939a91dcb9bd4b773f2a0c0cb4eb016679b49 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:36 -0800 Subject: [PATCH 1209/1436] net/mlx5: Update enable HCA dependency With the introduction of ECPF, we require that the ECPF driver will aways call enable/disable HCA for that PF in the same way a PF does this for its VFs. The PF is still responsible for calling enable and disable HCA for its VFs. To distinguish between the ECPF executing enable/disable HCA for itself or for the PF, it sets the embedded CPU function bit in the input params struct of these commands. When the bit is cleared and function ID is zero, it refers to the peer PF. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/ecpf.c | 76 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 4 + .../net/ethernet/mellanox/mlx5/core/main.c | 14 ++++ include/linux/mlx5/mlx5_ifc.h | 6 +- 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 28b8c5c5c8c7..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -7,3 +7,79 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; } + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 8b684f0ab48f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -14,11 +14,15 @@ enum { }; bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 08a3da2a8358..40d591c8e76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -612,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -622,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -1071,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1088,6 +1098,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1162,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 46799b4c8859..1b6d5a563a3a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6061,7 +6061,8 @@ struct mlx5_ifc_enable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6105,7 +6106,8 @@ struct mlx5_ifc_disable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; From c3a4e9f10714911486d09e7729195cc8fbedecd3 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:37 -0800 Subject: [PATCH 1210/1436] net/mlx5: Add query host params command The QUERY_HOST_PARAMS command is used by an Embedded CPU Physical Function (ECPF) driver to identify and retrieve information about the PF on the host side. E.g, number of virtual functions and PCI BDF. The number of VFs can be changed on the fly, a function is added to query current number of VFs and will be used in downstream patches. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 + .../net/ethernet/mellanox/mlx5/core/ecpf.c | 27 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/ecpf.h | 4 ++ include/linux/mlx5/mlx5_ifc.h | 41 +++++++++++++++++++ 4 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a25a8c6f938e..46d70eb2d2f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -627,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 1bcf8b8f9713..4746f2d28fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,3 +83,30 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index d3d7a00a02ac..346372df218f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,6 +16,7 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -23,6 +24,9 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1b6d5a563a3a..565046830559 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -142,6 +142,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9522,4 +9523,44 @@ struct mlx5_ifc_mtrc_ctrl_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_host_params_context_bits { + u8 host_number[0x8]; + u8 reserved_at_8[0x8]; + u8 host_num_of_vfs[0x10]; + + u8 reserved_at_20[0x10]; + u8 host_pci_bus[0x10]; + + u8 reserved_at_40[0x10]; + u8 host_pci_device[0x10]; + + u8 reserved_at_60[0x10]; + u8 host_pci_function[0x10]; + + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_query_host_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_host_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_host_params_context_bits host_params_context; + + u8 reserved_at_280[0x180]; +}; + #endif /* MLX5_IFC_H */ From 7f0d11c7e0d08304de55b6a571a69166f3d54160 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:38 -0800 Subject: [PATCH 1211/1436] net/mlx5: Add host params change event In Embedded CPU (EC) configurations, the EC driver needs to know when the number of virtual functions change on the corresponding PF at the host side. This is required so the EC driver can create or destroy representor net devices that represent the VFs ports. Whenever a change in the number of VFs occurs, firmware will generate an event towards the EC which will trigger a work to complete the rest of the handling. The specifics of the handling will be introduced in a downstream patch. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++ include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 5 +++++ 4 files changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7092457705a2..5c02f9291799 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -530,6 +530,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index fbc42b7252a9..4f7f776d6332 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f2070350f60a..f93a5598b942 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,6 +342,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cce4e8293384..151563a12fc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1083,6 +1083,11 @@ static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu; } +static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From feb393693316bd5de2c88a020f6ded51e3a4120b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:39 -0800 Subject: [PATCH 1212/1436] net/mlx5: Provide an alternative VF upper bound for ECPF ECPF doesn't support SR-IOV, but an ECPF E-Switch manager shall know the max VFs supported by its peer host PF in order to control those VF vports. The current driver implementation uses the total vfs quantity as provided by the pci sub-system for an upper bound of the VF vports the e-switch code needs to deal with. This obviously can't work as is on ECPF e-switch manager. For now, we use a hard coded value of 128 on such systems. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 151563a12fc2..46e0aa52a58a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,7 +1088,16 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) +#define MLX5_HOST_PF_MAX_VFS (127u) +static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_ecpf_esw_manager(dev)) + return MLX5_HOST_PF_MAX_VFS; + else + return pci_sriov_get_totalvfs(dev->pdev); +} + +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ From b05af6aacdb920dc3bfd27d53ade7f680d43265c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:40 -0800 Subject: [PATCH 1213/1436] net/mlx5: E-Switch, Normalize the name of uplink vport number Driver used to name uplink vport as FDB_UPLINK_VPORT, it's hard to comply with the same naming convention along with the introduction of other vports. Use MLX5_VPORT as the prefix for such vports and relocate the uplink vport definition to public header file for the benefits of both net and IB drivers. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++-- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 22 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++--- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +++---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 -- .../mellanox/mlx5/core/eswitch_offloads.c | 14 ++++++------ include/linux/mlx5/vport.h | 4 ++++ 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 3fb22967c098..99cae9a10195 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ +#include #include "ib_rep.h" #include "srq.h" @@ -48,11 +49,10 @@ static const struct mlx5_ib_profile vf_rep_profile = { static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { -#define FDB_UPLINK_VPORT 0xffff const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else profile = &vf_rep_profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 96cc0c6a4014..c78d21b2501e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -152,7 +152,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_uplink_rep_update_hw_counters(priv); else mlx5e_vf_rep_update_hw_counters(priv); @@ -1207,7 +1207,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) return false; rep = rpriv->rep; - return (rep->vport == FDB_UPLINK_VPORT); + return (rep->vport == MLX5_VPORT_UPLINK); } static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) @@ -1343,7 +1343,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->sw_mtu = netdev->mtu; /* SQ */ - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; else params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; @@ -1370,7 +1370,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ @@ -1402,7 +1402,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != FDB_UPLINK_VPORT) + if (rep->vport != MLX5_VPORT_UPLINK) netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; @@ -1555,7 +1555,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { uplink_priv = &rpriv->uplink_priv; /* init shared tc flow table */ @@ -1591,7 +1591,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { /* clean indirect TC block notifications */ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); mlx5e_rep_indr_clean_block_privs(rpriv); @@ -1710,7 +1710,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1723,7 +1723,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_create_mdev_resources(dev); if (err) goto err_destroy_netdev; @@ -1759,7 +1759,7 @@ err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_mdev_resources: - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(dev); err_destroy_netdev: @@ -1779,7 +1779,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) unregister_netdev(netdev); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..1a73e661056a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1834,7 +1834,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -2724,7 +2724,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && flow->flags & MLX5E_TC_FLOW_INGRESS; bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); @@ -2849,7 +2849,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, * original flow and packets redirected from uplink use the * peer mdev. */ - if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d7382892e81c..0db56b4b7009 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,8 +40,6 @@ #include "eswitch.h" #include "fs_core.h" -#define UPLINK_VPORT 0xFFFF - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -188,7 +186,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -499,7 +497,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -736,7 +734,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..94da74b1e6ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -49,8 +49,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..b0b1267eab07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -359,15 +359,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, in_rep = attr->in_rep; out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -409,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -469,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -1227,7 +1227,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; return 0; } @@ -1811,7 +1811,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) + if (vport == MLX5_VPORT_UPLINK) vport = UPLINK_REP_INDEX; rep = &offloads->vport_reps[vport]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 1654b911cdb2..2e2928eacd97 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -42,6 +42,10 @@ enum { MLX5_CAP_INLINE_MODE_NOT_REQUIRED, }; +enum { + MLX5_VPORT_UPLINK = 0xffff +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); From bf3e4d387daed36aad2cfd4f493b07714ac0cd5e Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:41 -0800 Subject: [PATCH 1214/1436] net/mlx5: Relocate vport macros to the vport header file These are two macros in the driver general header which deal with the number of total vports and if a vport is vport manager. Such macros are vport entities, better to place them at the vport header file. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + include/linux/mlx5/driver.h | 6 ------ include/linux/mlx5/vport.h | 7 +++++++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5c02f9291799..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 79f122b45def..b6a7bc8f667c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "mlx5_core.h" diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46e0aa52a58a..c5454f985e1d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1097,12 +1097,6 @@ static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) return pci_sriov_get_totalvfs(dev->pdev); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) - static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 2e2928eacd97..28f47e868fbc 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,6 +36,13 @@ #include #include +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) + +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + enum { MLX5_CAP_INLINE_MODE_L2, MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, From cd7e4186af9d968559852b4eeb1039b3419cc590 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:42 -0800 Subject: [PATCH 1215/1436] net/mlx5: E-Switch, Avoid magic numbers when initializing offloads mode When dealing with the offloads mode initialization, driver refers to the number of VFs and add magic number one (1) to take account of the uplink. This is not clear and will make the code less readable after adding other vports (e.g. host PF). As these are special vports compared to VF vports, add a helper macro to denote such special vports and eliminate the use of magic number. Moreover, when creating offloads flow table and groups, the driver reserves two more slots for UC and MC miss rules. Replace this magic number with a helper macro as well. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../mellanox/mlx5/core/eswitch_offloads.c | 25 +++++++++++-------- include/linux/mlx5/vport.h | 4 ++- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0db56b4b7009..49a9e3877d2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1638,7 +1638,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0b1267eab07..1496e82b5108 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,6 +46,11 @@ enum { FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] @@ -904,8 +909,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + - esw->total_vports; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -999,7 +1004,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1048,7 +1054,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -1062,7 +1068,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1082,16 +1088,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1407,11 +1412,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 28f47e868fbc..3bc05449ac39 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,9 @@ #include #include -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From bc4e12ffefdd886057eabe38135515690d0756a6 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:43 -0800 Subject: [PATCH 1216/1436] net/mlx5: Refactor queries to speed fields in Port Type and Speed register This patch fascicles queries to speed related fields in Port Type and Speed register (PTYS) into a single API. I addition, this patch refactors functions which serves only Ethernet driver: remove the protocol type as an input parameter, move code from 'core' directory into 'en' directory and add 'eth' prefix to the function's name. The patch also encapsulates functions that are not used outside the Ethernet driver removes redundant include files. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 6 +- .../net/ethernet/mellanox/mlx5/core/en/port.c | 75 +++++++++++-- .../net/ethernet/mellanox/mlx5/core/en/port.h | 12 ++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 23 ++-- .../net/ethernet/mellanox/mlx5/core/port.c | 106 ------------------ include/linux/mlx5/port.h | 11 -- 6 files changed, 91 insertions(+), 142 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 87ce62e44898..efd08b41126c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -393,6 +393,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; struct mlx5_core_dev *mdev; struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; @@ -416,10 +417,11 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ - err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, - mdev_port_num); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; + eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..9a1c2b2f87d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,6 +63,67 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; @@ -78,16 +139,14 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(eproto.oper); if (!(*speed)) err = -EINVAL; @@ -96,17 +155,17 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) + if (eproto.cap & MLX5E_PROT_MASK(i)) max_speed = max(max_speed, mlx5e_link_speed[i]); *speed = max_speed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..4bdab8be10af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,6 +36,18 @@ #include #include "en.h" +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin); u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c9df08133718..c29e141d72fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -882,7 +882,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; u8 an_disable_cap; @@ -898,14 +898,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_port_speed2linkmodes(speed); - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) { - netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); @@ -913,24 +913,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2b82f35f4c35..b81542820528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include -#include #include -#include #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index bf4bc01ffb0c..5be7eefa6d75 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -110,27 +110,16 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask); -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); From a0a899895692d4227cc55b087f5f47e185af7f23 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:44 -0800 Subject: [PATCH 1217/1436] net/mlx5: Add new fields to Port Type and Speed register Register Port Type and Speed (PTYS) introduces three new fields extending the speed/protocols the can be reported and configured. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 565046830559..5decffe565fb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7826,21 +7826,23 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x3c]; + u8 reserved_at_24[0x1c]; + + u8 ext_eth_proto_capability[0x20]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; - u8 reserved_at_a0[0x20]; + u8 ext_eth_proto_admin[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; - u8 reserved_at_100[0x20]; + u8 ext_eth_proto_oper[0x20]; u8 eth_proto_oper[0x20]; @@ -8289,7 +8291,9 @@ struct mlx5_ifc_mpegc_reg_bits { struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x6d]; u8 rx_icrc_encapsulated_counter[0x1]; - u8 reserved_at_6e[0x8]; + u8 reserved_at_6e[0x4]; + u8 ptys_extended_ethernet[0x1]; + u8 reserved_at_73[0x3]; u8 pfcc_mask[0x1]; u8 reserved_at_77[0x3]; u8 per_lane_error_counters[0x1]; From a08b4ed1373dc59e3e15029bc6f135ba0f53c9a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:45 -0800 Subject: [PATCH 1218/1436] net/mlx5: Add support to ext_* fields introduced in Port Type and Speed register This patch exposes new link modes (including 50Gbps per lane), and ext_* fields which describes the new link modes in Port Type and Speed register (PTYS). Access functions, translation functions (speed <-> HW bits) and link max speed function were modified. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en/port.c | 85 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en/port.h | 8 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 9 +- include/linux/mlx5/port.h | 19 +++++ 5 files changed, 94 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index efd08b41126c..3677c00fa3bb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -421,7 +421,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 9a1c2b2f87d8..122927f3a600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,7 +63,31 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; @@ -72,13 +96,17 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, if (!eproto) return -EINVAL; + if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) + return -EOPNOTSUPP; + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); if (err) return err; - eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); return 0; } @@ -100,7 +128,7 @@ void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, } int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin) + u32 proto_admin, bool ext) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 in[MLX5_ST_SZ_DW(ptys_reg)]; @@ -118,38 +146,46 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, MLX5_SET(ptys_reg, in, local_port, 1); MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - *speed = mlx5e_port_ptys2speed(eproto.oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } @@ -157,31 +193,38 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4bdab8be10af..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -42,16 +42,16 @@ struct mlx5e_port_eth_proto { u32 oper; }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto); void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin); -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c29e141d72fb..8343cf7d292c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -695,13 +695,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -896,9 +897,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); + mlx5e_port_speed2linkmodes(mdev, speed); - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); @@ -923,7 +924,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false); mlx5_toggle_port_link(mdev); out: diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 5be7eefa6d75..814fa194663b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -92,6 +92,22 @@ enum mlx5e_link_mode { MLX5E_LINK_MODES_NUMBER, }; +enum mlx5e_ext_link_mode { + MLX5E_SGMII_100M = 0, + MLX5E_1000BASE_X_SGMII = 1, + MLX5E_5GBASE_R = 3, + MLX5E_10GBASE_XFI_XAUI_1 = 4, + MLX5E_40GBASE_XLAUI_4_XLPPI_4 = 5, + MLX5E_25GAUI_1_25GBASE_CR_KR = 6, + MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 = 7, + MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR = 8, + MLX5E_CAUI_4_100GBASE_CR4_KR4 = 9, + MLX5E_100GAUI_2_100GBASE_CR2_KR2 = 10, + MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, + MLX5E_400GAUI_8 = 15, + MLX5E_EXT_LINK_MODES_NUMBER, +}; + enum mlx5e_connector_type { MLX5E_PORT_UNKNOWN = 0, MLX5E_PORT_NONE = 1, @@ -106,6 +122,9 @@ enum mlx5e_connector_type { }; #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ + (ext ? MLX5_GET(reg, out, ext_##field) : \ + MLX5_GET(reg, out, field)) int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, From 08e8676f1607925adf36e399f0faa8ce3b10bb86 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:46 -0800 Subject: [PATCH 1219/1436] IB/mlx5: Add support for 50Gbps per lane link modes Driver now supports new link modes: 50Gbps per lane support for 50G/100G/200G. This patch reads the correct field (legacy vs. extended) based on a FW indication bit, and adds a translation function (link modes to IB width and speed) to the new link modes. Signed-off-by: Aya Levin Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 66 ++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3677c00fa3bb..581ae11e2fc9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -331,8 +331,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -389,6 +389,61 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width) +{ + switch (eth_proto_oper) { + case MLX5E_PROT_MASK(MLX5E_SGMII_100M): + case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_SDR; + break; + case MLX5E_PROT_MASK(MLX5E_5GBASE_R): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_DDR; + break; + case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + break; + case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_EDR; + break; + case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): + case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_HDR; + break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_HDR; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, + u8 *active_width, bool ext) +{ + return ext ? + translate_eth_ext_proto_oper(eth_proto_oper, active_speed, + active_width) : + translate_eth_legacy_proto_oper(eth_proto_oper, active_speed, + active_width); +} + static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { @@ -401,6 +456,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; + bool ext; int err; mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -421,14 +477,14 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false, - eth_proto_oper); + ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; translate_eth_proto_oper(eth_prot_oper, &props->active_speed, - &props->active_width); + &props->active_width, ext); props->port_cap_flags |= IB_PORT_CM_SUP; props->ip_gids = true; From cb5b020a8d38f77209d0472a0fea755299a8ec78 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Feb 2019 15:02:18 -0800 Subject: [PATCH 1220/1436] Revert "exec: load_script: don't blindly truncate shebang string" This reverts commit 8099b047ecc431518b9bb6bdbba3549bbecdc343. It turns out that people do actually depend on the shebang string being truncated, and on the fact that an interpreter (like perl) will often just re-interpret it entirely to get the full argument list. Reported-by: Samuel Dionne-Riel Acked-by: Kees Cook Cc: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/binfmt_script.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d0078cbb718b..7cde3f46ad26 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,14 +42,10 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - for (cp = bprm->buf+2;; cp++) { - if (cp >= bprm->buf + BINPRM_BUF_SIZE) - return -ENOEXEC; - if (!*cp || (*cp == '\n')) - break; - } + bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; + if ((cp = strchr(bprm->buf, '\n')) == NULL) + cp = bprm->buf+BINPRM_BUF_SIZE-1; *cp = '\0'; - while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) From fb405883c189dd30f2fab2b3e2c954f34f000ac3 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 14 Feb 2019 10:39:31 -0800 Subject: [PATCH 1221/1436] bpf: fix memory leak in bpf_lwt_xmit_reroute On error the skb should be freed. Tested with diff/steps provided by David Ahern. v2: surface routing errors to the user instead of a generic EINVAL, as suggested by David Ahern. Reported-by: David Ahern Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c") Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Signed-off-by: Alexei Starovoitov --- net/core/lwt_bpf.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 32251f3fcda0..a5c8c79d468a 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -179,17 +179,17 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); int oif = l3mdev ? l3mdev->ifindex : 0; struct dst_entry *dst = NULL; + int err = -EAFNOSUPPORT; struct sock *sk; struct net *net; bool ipv4; - int err; if (skb->protocol == htons(ETH_P_IP)) ipv4 = true; else if (skb->protocol == htons(ETH_P_IPV6)) ipv4 = false; else - return -EAFNOSUPPORT; + goto err; sk = sk_to_full_sk(skb->sk); if (sk) { @@ -215,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl4.saddr = iph->saddr; rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -EINVAL; + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto err; + } dst = &rt->dst; } else { struct ipv6hdr *iph6 = ipv6_hdr(skb); @@ -231,12 +233,17 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl6.saddr = iph6->saddr; err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); - if (err || IS_ERR(dst)) - return -EINVAL; + if (unlikely(err)) + goto err; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err; + } } if (unlikely(dst->error)) { + err = dst->error; dst_release(dst); - return -EINVAL; + goto err; } /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it @@ -246,17 +253,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) */ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto err; skb_dst_drop(skb); skb_dst_set(skb, dst); err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(err)) - return err; + goto err; /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ return LWTUNNEL_XMIT_DONE; + +err: + kfree_skb(skb); + return err; } static int bpf_xmit(struct sk_buff *skb) From 1ad9cbb890f059dd233868654bb9d9e4430b095c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Feb 2019 10:25:53 -0800 Subject: [PATCH 1222/1436] tools/bpf: replace bzero with memset bzero() call is deprecated and superseded by memset(). Signed-off-by: Andrii Nakryiko Reported-by: David Laight Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 48 +++++++++++++++++++++--------------------- tools/lib/bpf/btf.c | 5 ++--- tools/lib/bpf/libbpf.c | 5 ++--- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a5261f39e2bd..9cd015574e83 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -22,7 +22,7 @@ */ #include -#include +#include #include #include #include @@ -228,7 +228,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, name_len = load_attr->name ? strlen(load_attr->name) : 0; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.insn_cnt = (__u32)load_attr->insns_cnt; @@ -340,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; attr.insns = ptr_to_u64(insns); @@ -360,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -373,7 +373,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -385,7 +385,7 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -398,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -410,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); @@ -421,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); @@ -433,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; @@ -444,7 +444,7 @@ int bpf_obj_get(const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); @@ -455,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -468,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_type = type; @@ -479,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -493,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.query.target_fd = target_fd; attr.query.attach_type = type; attr.query.query_flags = query_flags; @@ -514,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; attr.test.data_in = ptr_to_u64(data); attr.test.data_out = ptr_to_u64(data_out); @@ -539,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) if (!test_attr->data_out && test_attr->data_size_out > 0) return -EINVAL; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = test_attr->prog_fd; attr.test.data_in = ptr_to_u64(test_attr->data_in); attr.test.data_out = ptr_to_u64(test_attr->data_out); @@ -559,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); @@ -574,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); @@ -588,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_id = id; return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -598,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_id = id; return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -608,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.btf_id = id; return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -619,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.info.bpf_fd = prog_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); @@ -635,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 6953fedb88ff..ade1c32fb083 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -484,7 +483,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); @@ -498,7 +497,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } ptr = temp_ptr; - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e3c39edfb9d3..6ef7e6e4cbd3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -308,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, return -EINVAL; } - bzero(prog, sizeof(*prog)); + memset(prog, 0, sizeof(*prog)); prog->section_name = strdup(section_name); if (!prog->section_name) { @@ -1577,7 +1576,7 @@ bpf_program__load(struct bpf_program *prog, struct bpf_prog_prep_result result; bpf_program_prep_t preprocessor = prog->preprocessor; - bzero(&result, sizeof(result)); + memset(&result, 0, sizeof(result)); err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { From d931206476b83d4d516f9425cc9e6a1cf361fb2b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Feb 2019 10:25:54 -0800 Subject: [PATCH 1223/1436] tools: sync uapi/linux/if_link.h header Syncing if_link.h that got out of sync. Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d6533828123a..5b225ff63b48 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -925,6 +925,7 @@ enum { enum { LINK_XSTATS_TYPE_UNSPEC, LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, __LINK_XSTATS_TYPE_MAX }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) From 4ae280b4ee3463fa57bbe6eede26b97daff8a0f1 Mon Sep 17 00:00:00 2001 From: Nikos Tsironis Date: Thu, 14 Feb 2019 20:38:47 +0200 Subject: [PATCH 1224/1436] dm thin: fix bug where bio that overwrites thin block ignores FUA When provisioning a new data block for a virtual block, either because the block was previously unallocated or because we are breaking sharing, if the whole block of data is being overwritten the bio that triggered the provisioning is issued immediately, skipping copying or zeroing of the data block. When this bio completes the new mapping is inserted in to the pool's metadata by process_prepared_mapping(), where the bio completion is signaled to the upper layers. This completion is signaled without first committing the metadata. If the bio in question has the REQ_FUA flag set and the system crashes right after its completion and before the next metadata commit, then the write is lost despite the REQ_FUA flag requiring that I/O completion for this request must only be signaled after the data has been committed to non-volatile storage. Fix this by deferring the completion of overwrite bios, with the REQ_FUA flag set, until after the metadata has been committed. Cc: stable@vger.kernel.org Signed-off-by: Nikos Tsironis Acked-by: Joe Thornber Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 55 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ca8af21bf644..e83b63608262 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -257,6 +257,7 @@ struct pool { spinlock_t lock; struct bio_list deferred_flush_bios; + struct bio_list deferred_flush_completions; struct list_head prepared_mappings; struct list_head prepared_discards; struct list_head prepared_discards_pt2; @@ -956,6 +957,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) mempool_free(m, &m->tc->pool->mapping_pool); } +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + /* + * If the bio has the REQ_FUA flag set we must commit the metadata + * before signaling its completion. + */ + if (!bio_triggers_commit(tc, bio)) { + bio_endio(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to the + * metadata that can't be committed, e.g, due to I/O errors on the + * metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_completions, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -988,7 +1022,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio); + complete_overwrite_bio(tc, bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -2317,7 +2351,7 @@ static void process_deferred_bios(struct pool *pool) { unsigned long flags; struct bio *bio; - struct bio_list bios; + struct bio_list bios, bio_completions; struct thin_c *tc; tc = get_first_thin(pool); @@ -2328,26 +2362,36 @@ static void process_deferred_bios(struct pool *pool) } /* - * If there are any deferred flush bios, we must commit - * the metadata before issuing them. + * If there are any deferred flush bios, we must commit the metadata + * before issuing them or signaling their completion. */ bio_list_init(&bios); + bio_list_init(&bio_completions); + spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&bios, &pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios); + + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); + bio_list_init(&pool->deferred_flush_completions); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { + bio_list_merge(&bios, &bio_completions); + while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; } pool->last_commit_jiffies = jiffies; + while ((bio = bio_list_pop(&bio_completions))) + bio_endio(bio); + while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -2954,6 +2998,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); + bio_list_init(&pool->deferred_flush_completions); INIT_LIST_HEAD(&pool->prepared_mappings); INIT_LIST_HEAD(&pool->prepared_discards); INIT_LIST_HEAD(&pool->prepared_discards_pt2); From 69ef943dbc14b21987c79f8399ffea08f9a1b446 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 14 Feb 2019 11:03:48 -0800 Subject: [PATCH 1225/1436] drm: Use array_size() when creating lease Passing an object_count of sufficient size will make object_count * 4 wrap around to be very small, then a later function will happily iterate off the end of the object_ids array. Using array_size() will saturate at SIZE_MAX, the kmalloc() will fail and we'll return an -ENOMEM to the norty userspace. Fixes: 62884cd386b8 ("drm: Add four ioctls for managing drm mode object leases [v7]") Signed-off-by: Matthew Wilcox Acked-by: Kees Cook Acked-by: Daniel Vetter Cc: # v4.15+ Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lease.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 99cba8ea5d82..5df1256618cc 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -528,7 +528,8 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, object_count = cl->object_count; - object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32)); + object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), + array_size(object_count, sizeof(__u32))); if (IS_ERR(object_ids)) return PTR_ERR(object_ids); From d358def706880defa4c9e87381c5bf086a97d5f9 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Tue, 5 Feb 2019 16:42:53 +0530 Subject: [PATCH 1226/1436] i2c: cadence: Fix the hold bit setting In case the hold bit is not needed we are carrying the old values. Fix the same by resetting the bit when not needed. Fixes the sporadic i2c bus lockups on National Instruments Zynq-based devices. Fixes: df8eb5691c48 ("i2c: Add driver for Cadence I2C controller") Reported-by: Kyle Roeschley Acked-by: Michal Simek Signed-off-by: Shubhrajyoti Datta Tested-by: Kyle Roeschley Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index b13605718291..d917cefc5a19 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -382,8 +382,10 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->recv_count > CDNS_I2C_FIFO_DEPTH) + if ((id->recv_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); @@ -440,8 +442,11 @@ static void cdns_i2c_msend(struct cdns_i2c *id) * Check for the message size against FIFO depth and set the * 'hold bus' bit if it is greater than FIFO depth. */ - if (id->send_count > CDNS_I2C_FIFO_DEPTH) + if ((id->send_count > CDNS_I2C_FIFO_DEPTH) || id->bus_hold_flag) ctrl_reg |= CDNS_I2C_CR_HOLD; + else + ctrl_reg = ctrl_reg & ~CDNS_I2C_CR_HOLD; + cdns_i2c_writereg(ctrl_reg, CDNS_I2C_CR_OFFSET); /* Clear the interrupts in interrupt status register. */ From f275a4659484716259cc46268d9043424e51cf0f Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 27 Dec 2018 16:42:25 +0100 Subject: [PATCH 1227/1436] i2c: bcm2835: Clear current buffer pointers and counts after a transfer The driver's interrupt handler checks whether a message is currently being handled with the curr_msg pointer. When it is NULL, the interrupt is considered to be unexpected. Similarly, the i2c_start_transfer routine checks for the remaining number of messages to handle in num_msgs. However, these values are never cleared and always keep the message and number relevant to the latest transfer (which might be done already and the underlying message memory might have been freed). When an unexpected interrupt hits with the DONE bit set, the isr will then try to access the flags field of the curr_msg structure, leading to a fatal page fault. The msg_buf and msg_buf_remaining fields are also never cleared at the end of the transfer, which can lead to similar pitfalls. Fix these issues by introducing a cleanup function and always calling it after a transfer is finished. Fixes: e2474541032d ("i2c: bcm2835: Fix hang for writing messages larger than 16 bytes") Signed-off-by: Paul Kocialkowski Acked-by: Stefan Wahren Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm2835.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index ec6e69aa3a8e..d2fbb4bb4a43 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -183,6 +183,15 @@ static void bcm2835_i2c_start_transfer(struct bcm2835_i2c_dev *i2c_dev) bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, c); } +static void bcm2835_i2c_finish_transfer(struct bcm2835_i2c_dev *i2c_dev) +{ + i2c_dev->curr_msg = NULL; + i2c_dev->num_msgs = 0; + + i2c_dev->msg_buf = NULL; + i2c_dev->msg_buf_remaining = 0; +} + /* * Note about I2C_C_CLEAR on error: * The I2C_C_CLEAR on errors will take some time to resolve -- if you were in @@ -283,6 +292,9 @@ static int bcm2835_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], time_left = wait_for_completion_timeout(&i2c_dev->completion, adap->timeout); + + bcm2835_i2c_finish_transfer(i2c_dev); + if (!time_left) { bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, BCM2835_I2C_C_CLEAR); From b4c3fbe6360178dc2181b7b43b7ae793a192b282 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Feb 2019 22:03:24 +0800 Subject: [PATCH 1228/1436] mac80211: Use linked list instead of rhashtable walk for mesh tables The mesh table code walks over hash tables for two purposes. First of all it's used as part of a netlink dump process, but it is also used for looking up entries to delete using criteria other than the hash key. The second purpose is directly contrary to the design specification of rhashtable walks. It is only meant for use by netlink dumps. This is because rhashtable is resizable and you cannot obtain a stable walk over it during a resize process. In fact mesh's use of rhashtable for dumping is bogus too. Rather than using rhashtable walk's iterator to keep track of the current position, it always converts the current position to an integer which defeats the purpose of the iterator. Therefore this patch converts all uses of rhashtable walk into a simple linked list. This patch also adds a new spin lock to protect the hash table insertion/removal as well as the walk list modifications. In fact the previous code was buggy as the removals can race with each other, potentially resulting in a double-free. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Johannes Berg --- net/mac80211/mesh.h | 6 ++ net/mac80211/mesh_pathtbl.c | 138 ++++++++++-------------------------- 2 files changed, 43 insertions(+), 101 deletions(-) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index cad6592c52a1..2ec7011a4d07 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -70,6 +70,7 @@ enum mesh_deferred_task_flags { * @dst: mesh path destination mac address * @mpp: mesh proxy mac address * @rhash: rhashtable list pointer + * @walk_list: linked list containing all mesh_path objects. * @gate_list: list pointer for known gates list * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be @@ -105,6 +106,7 @@ struct mesh_path { u8 dst[ETH_ALEN]; u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ struct rhash_head rhash; + struct hlist_node walk_list; struct hlist_node gate_list; struct ieee80211_sub_if_data *sdata; struct sta_info __rcu *next_hop; @@ -133,12 +135,16 @@ struct mesh_path { * gate's mpath may or may not be resolved and active. * @gates_lock: protects updates to known_gates * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr + * @walk_head: linked list containging all mesh_path objects + * @walk_lock: lock protecting walk_head * @entries: number of entries in the table */ struct mesh_table { struct hlist_head known_gates; spinlock_t gates_lock; struct rhashtable rhead; + struct hlist_head walk_head; + spinlock_t walk_lock; atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ }; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index a5125624a76d..884a0d212e8b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -59,8 +59,10 @@ static struct mesh_table *mesh_table_alloc(void) return NULL; INIT_HLIST_HEAD(&newtbl->known_gates); + INIT_HLIST_HEAD(&newtbl->walk_head); atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); + spin_lock_init(&newtbl->walk_lock); return newtbl; } @@ -249,28 +251,15 @@ mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) static struct mesh_path * __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) { - int i = 0, ret; - struct mesh_path *mpath = NULL; - struct rhashtable_iter iter; + int i = 0; + struct mesh_path *mpath; - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); - if (ret) - return NULL; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (i++ == idx) break; } - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); - if (IS_ERR(mpath) || !mpath) + if (!mpath) return NULL; if (mpath_expired(mpath)) { @@ -432,6 +421,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, return ERR_PTR(-ENOMEM); tbl = sdata->u.mesh.mesh_paths; + spin_lock_bh(&tbl->walk_lock); do { ret = rhashtable_lookup_insert_fast(&tbl->rhead, &new_mpath->rhash, @@ -441,8 +431,10 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params); - + else if (!ret) + hlist_add_head(&new_mpath->walk_list, &tbl->walk_head); } while (unlikely(ret == -EEXIST && !mpath)); + spin_unlock_bh(&tbl->walk_lock); if (ret && ret != -EEXIST) return ERR_PTR(ret); @@ -480,9 +472,14 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, memcpy(new_mpath->mpp, mpp, ETH_ALEN); tbl = sdata->u.mesh.mpp_paths; + + spin_lock_bh(&tbl->walk_lock); ret = rhashtable_lookup_insert_fast(&tbl->rhead, &new_mpath->rhash, mesh_rht_params); + if (!ret) + hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head); + spin_unlock_bh(&tbl->walk_lock); sdata->u.mesh.mpp_paths_generation++; return ret; @@ -503,20 +500,9 @@ void mesh_plink_broken(struct sta_info *sta) struct mesh_table *tbl = sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; - struct rhashtable_iter iter; - int ret; - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); - if (ret) - return; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + rcu_read_lock(); + hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { @@ -530,8 +516,7 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + rcu_read_unlock(); } static void mesh_path_free_rcu(struct mesh_table *tbl, @@ -551,6 +536,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { + hlist_del_rcu(&mpath->walk_list); rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); mesh_path_free_rcu(tbl, mpath); } @@ -571,27 +557,14 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; struct mesh_table *tbl = sdata->u.mesh.mesh_paths; struct mesh_path *mpath; - struct rhashtable_iter iter; - int ret; - - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); - if (ret) - return; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + struct hlist_node *n; + spin_lock_bh(&tbl->walk_lock); + hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } - - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + spin_unlock_bh(&tbl->walk_lock); } static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, @@ -599,51 +572,26 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, { struct mesh_table *tbl = sdata->u.mesh.mpp_paths; struct mesh_path *mpath; - struct rhashtable_iter iter; - int ret; - - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); - if (ret) - return; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + struct hlist_node *n; + spin_lock_bh(&tbl->walk_lock); + hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } - - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + spin_unlock_bh(&tbl->walk_lock); } static void table_flush_by_iface(struct mesh_table *tbl) { struct mesh_path *mpath; - struct rhashtable_iter iter; - int ret; + struct hlist_node *n; - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_ATOMIC); - if (ret) - return; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + spin_lock_bh(&tbl->walk_lock); + hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { __mesh_path_del(tbl, mpath); } - - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + spin_unlock_bh(&tbl->walk_lock); } /** @@ -675,7 +623,7 @@ static int table_path_del(struct mesh_table *tbl, { struct mesh_path *mpath; - rcu_read_lock(); + spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); if (!mpath) { rcu_read_unlock(); @@ -683,7 +631,7 @@ static int table_path_del(struct mesh_table *tbl, } __mesh_path_del(tbl, mpath); - rcu_read_unlock(); + spin_unlock_bh(&tbl->walk_lock); return 0; } @@ -854,28 +802,16 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { struct mesh_path *mpath; - struct rhashtable_iter iter; - int ret; + struct hlist_node *n; - ret = rhashtable_walk_init(&tbl->rhead, &iter, GFP_KERNEL); - if (ret) - return; - - rhashtable_walk_start(&iter); - - while ((mpath = rhashtable_walk_next(&iter))) { - if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) - continue; - if (IS_ERR(mpath)) - break; + spin_lock_bh(&tbl->walk_lock); + hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } - - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + spin_unlock_bh(&tbl->walk_lock); } void mesh_path_expire(struct ieee80211_sub_if_data *sdata) From 4ff3a9d14c6c06eaa4e5976c61599ea2bd9e81b2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Feb 2019 22:03:25 +0800 Subject: [PATCH 1229/1436] mac80211: Free mpath object when rhashtable insertion fails When rhashtable insertion fails the mesh table code doesn't free the now-orphan mesh path object. This patch fixes that. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 884a0d212e8b..c3a7396fb955 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -436,17 +436,15 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, } while (unlikely(ret == -EEXIST && !mpath)); spin_unlock_bh(&tbl->walk_lock); - if (ret && ret != -EEXIST) - return ERR_PTR(ret); - - /* At this point either new_mpath was added, or we found a - * matching entry already in the table; in the latter case - * free the unnecessary new entry. - */ - if (ret == -EEXIST) { + if (ret) { kfree(new_mpath); + + if (ret != -EEXIST) + return ERR_PTR(ret); + new_mpath = mpath; } + sdata->u.mesh.mesh_paths_generation++; return new_mpath; } @@ -481,6 +479,9 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head); spin_unlock_bh(&tbl->walk_lock); + if (ret) + kfree(new_mpath); + sdata->u.mesh.mpp_paths_generation++; return ret; } From 83e37e0bdd1470bbe6612250b745ad39b1a7b130 Mon Sep 17 00:00:00 2001 From: Rakesh Pillai Date: Fri, 15 Feb 2019 14:16:02 +0530 Subject: [PATCH 1230/1436] mac80211: Restore vif beacon interval if start ap fails The starting of AP interface can fail due to invalid beacon interval, which does not match the minimum gcd requirement set by the wifi driver. In such case, the beacon interval of that interface gets updated with that invalid beacon interval. The next time that interface is brought up in AP mode, an interface combination check is performed and the beacon interval is taken from the previously set value. In a case where an invalid beacon interval, i.e. a beacon interval value which does not satisfy the minimum gcd criteria set by the driver, is set, all the subsequent trials to bring that interface in AP mode will fail, even if the subsequent trials have a valid beacon interval. To avoid this, in case of a failure in bringing up an interface in AP mode due to interface combination error, the interface beacon interval which is stored in bss conf, needs to be restored with the last working value of beacon interval. Tested on ath10k using WCN3990. Cc: stable@vger.kernel.org Fixes: 0c317a02ca98 ("cfg80211: support virtual interfaces with different beacon intervals") Signed-off-by: Rakesh Pillai Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2493c74c2d37..96496b2c1670 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -941,6 +941,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER; int err; + int prev_beacon_int; old = sdata_dereference(sdata->u.ap.beacon, sdata); if (old) @@ -963,6 +964,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; + prev_beacon_int = sdata->vif.bss_conf.beacon_int; sdata->vif.bss_conf.beacon_int = params->beacon_interval; if (params->he_cap) @@ -974,8 +976,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (!err) ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); - if (err) + if (err) { + sdata->vif.bss_conf.beacon_int = prev_beacon_int; return err; + } /* * Apply control port protocol, this allows us to From f8ebfaf6684b03084858d8c55f81867e5171af08 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Wed, 13 Feb 2019 18:07:29 +0100 Subject: [PATCH 1231/1436] net: bpf: remove XDP_QUERY_XSK_UMEM enumerator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") moved the umem query code to the AF_XDP core, and therefore removed the need to query the netdevice for a umem. This patch removes XDP_QUERY_XSK_UMEM and all code that implement that behavior, which is just dead code. Signed-off-by: Jan Sokolowski Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 -- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 28 ------------------- drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 -- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 -- .../ethernet/intel/ixgbe/ixgbe_txrx_common.h | 2 -- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 17 ----------- include/linux/netdevice.h | 7 ++--- 7 files changed, 3 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 44856a84738d..5e74a5127849 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev, case XDP_QUERY_PROG: xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; - case XDP_QUERY_XSK_UMEM: - return i40e_xsk_umem_query(vsi, &xdp->xsk.umem, - xdp->xsk.queue_id); case XDP_SETUP_XSK_UMEM: return i40e_xsk_umem_setup(vsi, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 96d849460d9b..e190a2c2b9ff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) return 0; } -/** - * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM - * @vsi: Current VSI - * @umem: UMEM associated to the ring, if any - * @qid: Rx ring to associate UMEM to - * - * This function will store, if any, the UMEM associated to certain ring. - * - * Returns 0 on success, <0 on failure - **/ -int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, - u16 qid) -{ - struct net_device *netdev = vsi->netdev; - struct xdp_umem *queried_umem; - - if (vsi->type != I40E_VSI_MAIN) - return -EINVAL; - - queried_umem = xdp_get_umem_from_qid(netdev, qid); - - if (!queried_umem) - return -EINVAL; - - *umem = queried_umem; - return 0; -} - /** * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid * @vsi: Current VSI diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 9038c5d5cf08..8cc0a2e7d9a2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -10,8 +10,6 @@ struct zero_copy_allocator; int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair); int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair); -int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, - u16 qid); int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, u16 qid); void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b53087a980ef..38c430b94ae3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) xdp->prog_id = adapter->xdp_prog ? adapter->xdp_prog->aux->id : 0; return 0; - case XDP_QUERY_XSK_UMEM: - return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem, - xdp->xsk.queue_id); case XDP_SETUP_XSK_UMEM: return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index 53d4089f5644..d93a690aff74 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring); struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring); -int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem, - u16 qid); int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem, u16 qid); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 65c3e2c979d4..98870707b51a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid) return 0; } -int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem, - u16 qid) -{ - if (qid >= adapter->num_rx_queues) - return -EINVAL; - - if (adapter->xsk_umems) { - if (qid >= adapter->num_xsk_umems) - return -EINVAL; - *umem = adapter->xsk_umems[qid]; - return 0; - } - - *umem = NULL; - return 0; -} - int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem, u16 qid) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1d95e634f3fe..6aedaf1e9a25 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -868,7 +868,6 @@ enum bpf_netdev_command { /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, - XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM, }; @@ -895,10 +894,10 @@ struct netdev_bpf { struct { struct bpf_offloaded_map *offmap; }; - /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */ + /* XDP_SETUP_XSK_UMEM */ struct { - struct xdp_umem *umem; /* out for query*/ - u16 queue_id; /* in for query */ + struct xdp_umem *umem; + u16 queue_id; } xsk; }; }; From f331e766c4be33f4338574f3c9f7f77e98ab4571 Mon Sep 17 00:00:00 2001 From: Hedi Berriche Date: Wed, 13 Feb 2019 19:34:13 +0000 Subject: [PATCH 1232/1436] x86/platform/UV: Use efi_runtime_lock to serialise BIOS calls Calls into UV firmware must be protected against concurrency, expose the efi_runtime_lock to the UV platform, and use it to serialise UV BIOS calls. Signed-off-by: Hedi Berriche Signed-off-by: Borislav Petkov Reviewed-by: Ard Biesheuvel Reviewed-by: Russ Anderson Reviewed-by: Dimitri Sivanich Reviewed-by: Mike Travis Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Darren Hart Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-efi Cc: platform-driver-x86@vger.kernel.org Cc: stable@vger.kernel.org # v4.9+ Cc: Steve Wahl Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190213193413.25560-5-hedi.berriche@hpe.com --- arch/x86/include/asm/uv/bios.h | 8 +++++++- arch/x86/platform/uv/bios_uv.c | 23 +++++++++++++++++++++-- drivers/firmware/efi/runtime-wrappers.c | 7 +++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index e652a7cc6186..3f697a9e3f59 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -48,7 +48,8 @@ enum { BIOS_STATUS_SUCCESS = 0, BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, BIOS_STATUS_EINVAL = -EINVAL, - BIOS_STATUS_UNAVAIL = -EBUSY + BIOS_STATUS_UNAVAIL = -EBUSY, + BIOS_STATUS_ABORT = -EINTR, }; /* Address map parameters */ @@ -167,4 +168,9 @@ extern long system_serial_number; extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ +/* + * EFI runtime lock; cf. firmware/efi/runtime-wrappers.c for details + */ +extern struct semaphore __efi_uv_runtime_lock; + #endif /* _ASM_X86_UV_BIOS_H */ diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 4a6a5a26c582..eb33432f2f24 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -29,7 +29,8 @@ struct uv_systab *uv_systab; -s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { struct uv_systab *tab = uv_systab; s64 ret; @@ -51,6 +52,19 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) return ret; } + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +{ + s64 ret; + + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); + up(&__efi_uv_runtime_lock); + + return ret; +} EXPORT_SYMBOL_GPL(uv_bios_call); s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, @@ -59,10 +73,15 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, unsigned long bios_flags; s64 ret; + if (down_interruptible(&__efi_uv_runtime_lock)) + return BIOS_STATUS_ABORT; + local_irq_save(bios_flags); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); + ret = __uv_bios_call(which, a1, a2, a3, a4, a5); local_irq_restore(bios_flags); + up(&__efi_uv_runtime_lock); + return ret; } diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 8903b9ccfc2b..e2abfdb5cee6 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -146,6 +146,13 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) */ static DEFINE_SEMAPHORE(efi_runtime_lock); +/* + * Expose the EFI runtime lock to the UV platform + */ +#ifdef CONFIG_X86_UV +extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock); +#endif + /* * Calls the appropriate efi_runtime_service() with the appropriate * arguments. From 1a11a4c74f73adb840d61371c3bb560ed4d7a87f Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 14 Feb 2019 15:01:42 -0800 Subject: [PATCH 1233/1436] libbpf: Introduce bpf_map__resize Add bpf_map__resize() to change max_entries for a map. Quite often necessary map size is unknown at compile time and can be calculated only at run time. Currently the following approach is used to do so: * bpf_object__open_buffer() to open Elf file from a buffer; * bpf_object__find_map_by_name() to find relevant map; * bpf_map__def() to get map attributes and create struct bpf_create_map_attr from them; * update max_entries in bpf_create_map_attr; * bpf_create_map_xattr() to create new map with updated max_entries; * bpf_map__reuse_fd() to replace the map in bpf_object with newly created one. And after all this bpf_object can finally be loaded. The map will have new size. It 1) is quite a lot of steps; 2) doesn't take BTF into account. For "2)" even more steps should be made and some of them require changes to libbpf (e.g. to get struct btf * from bpf_object). Instead the whole problem can be solved by introducing simple bpf_map__resize() API that checks the map and sets new max_entries if the map is not loaded yet. So the new steps are: * bpf_object__open_buffer() to open Elf file from a buffer; * bpf_object__find_map_by_name() to find relevant map; * bpf_map__resize() to update max_entries. That's much simpler and works with BTF. Signed-off-by: Andrey Ignatov Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 14 ++++++++++++++ tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 16 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ef7e6e4cbd3..9597d4dace34 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1113,6 +1113,20 @@ err_free_new_name: return -errno; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + /* If map already created, its attributes can't be changed. */ + if (map->fd >= 0) + return -EBUSY; + + map->def.max_entries = max_entries; + + return 0; +} + static int bpf_object__probe_name(struct bpf_object *obj) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 69a7c25eaccc..987fd92661d6 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -294,6 +294,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5fc8222209f8..16f342c3d4bc 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -130,6 +130,7 @@ LIBBPF_0.0.2 { bpf_probe_helper; bpf_probe_map_type; bpf_probe_prog_type; + bpf_map__resize; bpf_map_lookup_elem_flags; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; From 789f6bab849e04ea029c09b81dc8401dc0268cf9 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 14 Feb 2019 15:01:43 -0800 Subject: [PATCH 1234/1436] libbpf: Introduce bpf_object__btf Add new accessor for bpf_object to get opaque struct btf * from it. struct btf * is needed for all operations with BTF and it's present in bpf_object. The only thing missing is a way to get it. Example use-case is to get BTF key_type_id and value_type_id for a map in bpf_object. It can be done with btf__get_map_kv_tids() but that function requires struct btf *. Similar API can be added for struct btf_ext but no use-case for it yet. Signed-off-by: Andrey Ignatov Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++++ tools/lib/bpf/libbpf.h | 3 +++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 9 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9597d4dace34..b38dcbe7460a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2331,6 +2331,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +struct btf *bpf_object__btf(struct bpf_object *obj) +{ + return obj ? obj->btf : NULL; +} + int bpf_object__btf_fd(const struct bpf_object *obj) { return obj->btf ? btf__fd(obj->btf) : -1; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 987fd92661d6..6c0168f8bba5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -89,6 +89,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); + +struct btf; +LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 16f342c3d4bc..99dfa710c818 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -132,6 +132,7 @@ LIBBPF_0.0.2 { bpf_probe_prog_type; bpf_map__resize; bpf_map_lookup_elem_flags; + bpf_object__btf; bpf_object__find_map_fd_by_name; bpf_get_link_xdp_id; btf__dedup; From 23b7ca4f745f21c2b9cfcb67fdd33733b3ae7e66 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 15 Feb 2019 12:50:24 +0100 Subject: [PATCH 1235/1436] netfilter: nf_tables: fix flush after rule deletion in the same batch Flush after rule deletion bogusly hits -ENOENT. Skip rules that have been already from nft_delrule_by_chain() which is always called from the flush path. Fixes: cf9dc09d0949 ("netfilter: nf_tables: fix missing rules flushing per table") Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5a92f23f179f..4893f248dfdc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -313,6 +313,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) int err; list_for_each_entry(rule, &ctx->chain->rules, list) { + if (!nft_is_active_next(ctx->net, rule)) + continue; + err = nft_delrule(ctx, rule); if (err < 0) return err; From 2c4f1fcbef0bc324830bc2fb1a264c08ec93dec5 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 25 Jan 2019 23:10:50 +0800 Subject: [PATCH 1236/1436] kprobe: Do not use uaccess functions to access kernel memory that can fault The userspace can ask kprobe to intercept strings at any memory address, including invalid kernel address. In this case, fetch_store_strlen() would crash since it uses general usercopy function, and user access functions are no longer allowed to access kernel memory. For example, we can crash the kernel by doing something as below: $ sudo kprobe 'p:do_sys_open +0(+0(%si)):string' [ 103.620391] BUG: GPF in non-whitelisted uaccess (non-canonical address?) [ 103.622104] general protection fault: 0000 [#1] SMP PTI [ 103.623424] CPU: 10 PID: 1046 Comm: cat Not tainted 5.0.0-rc3-00130-gd73aba1-dirty #96 [ 103.625321] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-2-g628b2e6-dirty-20190104_103505-linux 04/01/2014 [ 103.628284] RIP: 0010:process_fetch_insn+0x1ab/0x4b0 [ 103.629518] Code: 10 83 80 28 2e 00 00 01 31 d2 31 ff 48 8b 74 24 28 eb 0c 81 fa ff 0f 00 00 7f 1c 85 c0 75 18 66 66 90 0f ae e8 48 63 ca 89 f8 <8a> 0c 31 66 66 90 83 c2 01 84 c9 75 dc 89 54 24 34 89 44 24 28 48 [ 103.634032] RSP: 0018:ffff88845eb37ce0 EFLAGS: 00010246 [ 103.635312] RAX: 0000000000000000 RBX: ffff888456c4e5a8 RCX: 0000000000000000 [ 103.637057] RDX: 0000000000000000 RSI: 2e646c2f6374652f RDI: 0000000000000000 [ 103.638795] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 103.640556] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 [ 103.642297] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 103.644040] FS: 0000000000000000(0000) GS:ffff88846f000000(0000) knlGS:0000000000000000 [ 103.646019] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 103.647436] CR2: 00007ffc79758038 CR3: 0000000463360006 CR4: 0000000000020ee0 [ 103.649147] Call Trace: [ 103.649781] ? sched_clock_cpu+0xc/0xa0 [ 103.650747] ? do_sys_open+0x5/0x220 [ 103.651635] kprobe_trace_func+0x303/0x380 [ 103.652645] ? do_sys_open+0x5/0x220 [ 103.653528] kprobe_dispatcher+0x45/0x50 [ 103.654682] ? do_sys_open+0x1/0x220 [ 103.655875] kprobe_ftrace_handler+0x90/0xf0 [ 103.657282] ftrace_ops_assist_func+0x54/0xf0 [ 103.658564] ? __call_rcu+0x1dc/0x280 [ 103.659482] 0xffffffffc00000bf [ 103.660384] ? __ia32_sys_open+0x20/0x20 [ 103.661682] ? do_sys_open+0x1/0x220 [ 103.662863] do_sys_open+0x5/0x220 [ 103.663988] do_syscall_64+0x60/0x210 [ 103.665201] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 103.666862] RIP: 0033:0x7fc22fadccdd [ 103.668034] Code: 48 89 54 24 e0 41 83 e2 40 75 32 89 f0 25 00 00 41 00 3d 00 00 41 00 74 24 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 77 33 f3 c3 66 0f 1f 84 00 00 00 00 00 48 8d 44 [ 103.674029] RSP: 002b:00007ffc7972c3a8 EFLAGS: 00000287 ORIG_RAX: 0000000000000101 [ 103.676512] RAX: ffffffffffffffda RBX: 0000562f86147a21 RCX: 00007fc22fadccdd [ 103.678853] RDX: 0000000000080000 RSI: 00007fc22fae1428 RDI: 00000000ffffff9c [ 103.681151] RBP: ffffffffffffffff R08: 0000000000000000 R09: 0000000000000000 [ 103.683489] R10: 0000000000000000 R11: 0000000000000287 R12: 00007fc22fce90a8 [ 103.685774] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 [ 103.688056] Modules linked in: [ 103.689131] ---[ end trace 43792035c28984a1 ]--- This can be fixed by using probe_mem_read() instead, as it can handle faulting kernel memory addresses, which kprobes can legitimately do. Link: http://lkml.kernel.org/r/20190125151051.7381-1-changbin.du@gmail.com Cc: stable@vger.kernel.org Fixes: 9da3f2b7405 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d5fb09ebba8b..9eaf07f99212 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -861,22 +861,14 @@ static const struct file_operations kprobe_profile_ops = { static nokprobe_inline int fetch_store_strlen(unsigned long addr) { - mm_segment_t old_fs; int ret, len = 0; u8 c; - old_fs = get_fs(); - set_fs(KERNEL_DS); - pagefault_disable(); - do { - ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); + ret = probe_mem_read(&c, (u8 *)addr + len, 1); len++; } while (c && ret == 0 && len < MAX_STRING_SIZE); - pagefault_enable(); - set_fs(old_fs); - return (ret < 0) ? ret : len; } From 9e7382153f80ba45a0bbcd540fb77d4b15f6e966 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 14 Feb 2019 15:29:50 +0000 Subject: [PATCH 1237/1436] tracing: Fix number of entries in trace header The following commit 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") removed the call to print_event_info() from print_func_help_header_irq() which results in the ftrace header not reporting the number of entries written in the buffer. As this wasn't the original intent of the patch, re-introduce the call to print_event_info() to restore the orginal behaviour. Link: http://lkml.kernel.org/r/20190214152950.4179-1-quentin.perret@arm.com Acked-by: Joel Fernandes Cc: stable@vger.kernel.org Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") Signed-off-by: Quentin Perret Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c521b7347482..c4238b441624 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3384,6 +3384,8 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file const char tgid_space[] = " "; const char space[] = " "; + print_event_info(buf, m); + seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? tgid_space : space); seq_printf(m, "# %s / _----=> need-resched\n", From 69ef9bc54715fb1cb7786ada15774e469e822209 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 9 Feb 2019 00:38:45 +0100 Subject: [PATCH 1238/1436] auxdisplay: ht16k33: fix potential user-after-free on module unload On module unload/remove, we need to ensure that work does not run after we have freed resources. Concretely, cancel_delayed_work() may return while the callback function is still running. From kernel/workqueue.c: The work callback function may still be running on return, unless it returns true and the work doesn't re-arm itself. Explicitly flush or use cancel_delayed_work_sync() to wait on it. Link: https://lore.kernel.org/lkml/20190204220952.30761-1-TheSven73@googlemail.com/ Reported-by: Sven Van Asbroeck Reviewed-by: Dmitry Torokhov Reviewed-by: Sven Van Asbroeck Acked-by: Robin van der Gracht Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/ht16k33.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index a43276c76fc6..21393ec3b9a4 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -509,7 +509,7 @@ static int ht16k33_remove(struct i2c_client *client) struct ht16k33_priv *priv = i2c_get_clientdata(client); struct ht16k33_fbdev *fbdev = &priv->fbdev; - cancel_delayed_work(&fbdev->work); + cancel_delayed_work_sync(&fbdev->work); unregister_framebuffer(fbdev->info); framebuffer_release(fbdev->info); free_page((unsigned long) fbdev->buffer); From ff98e20ef2081b8620dada28fc2d4fb24ca0abf2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 24 Jan 2019 15:59:11 +0100 Subject: [PATCH 1239/1436] lib/crc32.c: mark crc32_le_base/__crc32c_le_base aliases as __pure The upcoming GCC 9 release extends the -Wmissing-attributes warnings (enabled by -Wall) to C and aliases: it warns when particular function attributes are missing in the aliases but not in their target. In particular, it triggers here because crc32_le_base/__crc32c_le_base aren't __pure while their target crc32_le/__crc32c_le are. These aliases are used by architectures as a fallback in accelerated versions of CRC32. See commit 9784d82db3eb ("lib/crc32: make core crc32() routines weak so they can be overridden"). Therefore, being fallbacks, it is likely that even if the aliases were called from C, there wouldn't be any optimizations possible. Currently, the only user is arm64, which calls this from asm. Still, marking the aliases as __pure makes sense and is a good idea for documentation purposes and possible future optimizations, which also silences the warning. Acked-by: Ard Biesheuvel Tested-by: Laura Abbott Signed-off-by: Miguel Ojeda --- lib/crc32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/crc32.c b/lib/crc32.c index 45b1d67a1767..4a20455d1f61 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -206,8 +206,8 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); -u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); -u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); +u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); /* * This multiplies the polynomials x and y modulo the given modulus. From c0d9782f5b6d7157635ae2fd782a4b27d55a6013 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 8 Feb 2019 23:51:05 +0100 Subject: [PATCH 1240/1436] Compiler Attributes: add support for __copy (gcc >= 9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the GCC manual: copy copy(function) The copy attribute applies the set of attributes with which function has been declared to the declaration of the function to which the attribute is applied. The attribute is designed for libraries that define aliases or function resolvers that are expected to specify the same set of attributes as their targets. The copy attribute can be used with functions, variables, or types. However, the kind of symbol to which the attribute is applied (either function or variable) must match the kind of symbol to which the argument refers. The copy attribute copies only syntactic and semantic attributes but not attributes that affect a symbol’s linkage or visibility such as alias, visibility, or weak. The deprecated attribute is also not copied. https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html The upcoming GCC 9 release extends the -Wmissing-attributes warnings (enabled by -Wall) to C and aliases: it warns when particular function attributes are missing in the aliases but not in their target, e.g.: void __cold f(void) {} void __alias("f") g(void); diagnoses: warning: 'g' specifies less restrictive attribute than its target 'f': 'cold' [-Wmissing-attributes] Using __copy(f) we can copy the __cold attribute from f to g: void __cold f(void) {} void __copy(f) __alias("f") g(void); This attribute is most useful to deal with situations where an alias is declared but we don't know the exact attributes the target has. For instance, in the kernel, the widely used module_init/exit macros define the init/cleanup_module aliases, but those cannot be marked always as __init/__exit since some modules do not have their functions marked as such. Suggested-by: Martin Sebor Reviewed-by: Nick Desaulniers Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 19f32b0c29af..6b318efd8a74 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -34,6 +34,7 @@ #ifndef __has_attribute # define __has_attribute(x) __GCC4_has_attribute_##x # define __GCC4_has_attribute___assume_aligned__ (__GNUC_MINOR__ >= 9) +# define __GCC4_has_attribute___copy__ 0 # define __GCC4_has_attribute___designated_init__ 0 # define __GCC4_has_attribute___externally_visible__ 1 # define __GCC4_has_attribute___noclone__ 1 @@ -100,6 +101,19 @@ */ #define __attribute_const__ __attribute__((__const__)) +/* + * Optional: only supported since gcc >= 9 + * Optional: not supported by clang + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute + */ +#if __has_attribute(__copy__) +# define __copy(symbol) __attribute__((__copy__(symbol))) +#else +# define __copy(symbol) +#endif + /* * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated' * attribute warnings entirely and for good") for more information. From a6e60d84989fa0e91db7f236eda40453b0e44afa Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 19 Jan 2019 20:59:34 +0100 Subject: [PATCH 1241/1436] include/linux/module.h: copy __init/__exit attrs to init/cleanup_module The upcoming GCC 9 release extends the -Wmissing-attributes warnings (enabled by -Wall) to C and aliases: it warns when particular function attributes are missing in the aliases but not in their target. In particular, it triggers for all the init/cleanup_module aliases in the kernel (defined by the module_init/exit macros), ending up being very noisy. These aliases point to the __init/__exit functions of a module, which are defined as __cold (among other attributes). However, the aliases themselves do not have the __cold attribute. Since the compiler behaves differently when compiling a __cold function as well as when compiling paths leading to calls to __cold functions, the warning is trying to point out the possibly-forgotten attribute in the alias. In order to keep the warning enabled, we decided to silence this case. Ideally, we would mark the aliases directly as __init/__exit. However, there are currently around 132 modules in the kernel which are missing __init/__exit in their init/cleanup functions (either because they are missing, or for other reasons, e.g. the functions being called from somewhere else); and a section mismatch is a hard error. A conservative alternative was to mark the aliases as __cold only. However, since we would like to eventually enforce __init/__exit to be always marked, we chose to use the new __copy function attribute (introduced by GCC 9 as well to deal with this). With it, we copy the attributes used by the target functions into the aliases. This way, functions that were not marked as __init/__exit won't have their aliases marked either, and therefore there won't be a section mismatch. Note that the warning would go away marking either the extern declaration, the definition, or both. However, we only mark the definition of the alias, since we do not want callers (which only see the declaration) to be compiled as if the function was __cold (and therefore the paths leading to those calls would be assumed to be unlikely). Link: https://lore.kernel.org/lkml/20190123173707.GA16603@gmail.com/ Link: https://lore.kernel.org/lkml/20190206175627.GA20399@gmail.com/ Suggested-by: Martin Sebor Acked-by: Jessica Yu Signed-off-by: Miguel Ojeda --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 8fa38d3e7538..f5bc4c046461 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -129,13 +129,13 @@ extern void cleanup_module(void); #define module_init(initfn) \ static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); + int init_module(void) __copy(initfn) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); + void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn))); #endif From e7afe6c1d486b516ed586dcc10b3e7e3e85a9c2b Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 15 Feb 2019 13:42:02 -0500 Subject: [PATCH 1242/1436] sunrpc: fix 4 more call sites that were using stack memory with a scatterlist While trying to reproduce a reported kernel panic on arm64, I discovered that AUTH_GSS basically doesn't work at all with older enctypes on arm64 systems with CONFIG_VMAP_STACK enabled. It turns out there still a few places using stack memory with scatterlists, causing krb5_encrypt() and krb5_decrypt() to produce incorrect results (or a BUG if CONFIG_DEBUG_SG is enabled). Tested with cthon on v4.0/v4.1/v4.2 with krb5/krb5i/krb5p using des3-cbc-sha1 and arcfour-hmac-md5. Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 49 +++++++++++++++++++++------ 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index fb6656295204..507105127095 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) { struct crypto_sync_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -52,6 +52,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, if (IS_ERR(cipher)) return PTR_ERR(cipher); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) ((seqnum >> 24) & 0xff); plain[1] = (unsigned char) ((seqnum >> 16) & 0xff); plain[2] = (unsigned char) ((seqnum >> 8) & 0xff); @@ -67,6 +71,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, code = krb5_encrypt(cipher, cksum, plain, buf, 8); out: + kfree(plain); crypto_free_sync_skcipher(cipher); return code; } @@ -77,12 +82,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx, u32 seqnum, unsigned char *cksum, unsigned char *buf) { - unsigned char plain[8]; + unsigned char *plain; + s32 code; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_make_rc4_seq_num(kctx, direction, seqnum, cksum, buf); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) (seqnum & 0xff); plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); @@ -93,7 +103,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx, plain[6] = direction; plain[7] = direction; - return krb5_encrypt(key, cksum, plain, buf, 8); + code = krb5_encrypt(key, cksum, plain, buf, 8); + kfree(plain); + return code; } static s32 @@ -101,7 +113,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, unsigned char *buf, int *direction, s32 *seqnum) { struct crypto_sync_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -113,20 +125,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, if (code) goto out; + plain = kmalloc(8, GFP_NOFS); + if (!plain) { + code = -ENOMEM; + goto out; + } + code = krb5_decrypt(cipher, cksum, buf, plain, 8); if (code) - goto out; + goto out_plain; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || (plain[4] != plain[7])) { code = (s32)KG_BAD_SEQ; - goto out; + goto out_plain; } *direction = plain[4]; *seqnum = ((plain[0] << 24) | (plain[1] << 16) | (plain[2] << 8) | (plain[3])); +out_plain: + kfree(plain); out: crypto_free_sync_skcipher(cipher); return code; @@ -139,7 +159,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx, int *direction, u32 *seqnum) { s32 code; - unsigned char plain[8]; + unsigned char *plain; struct crypto_sync_skcipher *key = kctx->seq; dprintk("RPC: krb5_get_seq_num:\n"); @@ -147,18 +167,25 @@ krb5_get_seq_num(struct krb5_ctx *kctx, if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_get_rc4_seq_num(kctx, cksum, buf, direction, seqnum); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) - return code; + goto out; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || - (plain[4] != plain[7])) - return (s32)KG_BAD_SEQ; + (plain[4] != plain[7])) { + code = (s32)KG_BAD_SEQ; + goto out; + } *direction = plain[4]; *seqnum = ((plain[0]) | (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); - return 0; +out: + kfree(plain); + return code; } From b251f9f63a3bea7864bf627960926d978e97814d Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Fri, 15 Feb 2019 09:51:35 -0800 Subject: [PATCH 1243/1436] bpf: make LWTUNNEL_BPF dependent on INET Lightweight tunnels are L3 constructs that are used with IP/IP6. For example, lwtunnel_xmit is called from ip_output.c and ip6_output.c only. Make the dependency explicit at least for LWT-BPF, as now they call into IP routing. V2: added "Reported-by" below. Reported-by: Randy Dunlap Signed-off-by: Peter Oskolkov Acked-by: Randy Dunlap # build-tested Signed-off-by: Daniel Borkmann --- net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/Kconfig b/net/Kconfig index 5cb9de1aaf88..62da6148e9f8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -403,7 +403,7 @@ config LWTUNNEL config LWTUNNEL_BPF bool "Execute BPF program as route nexthop action" - depends on LWTUNNEL + depends on LWTUNNEL && INET default y if LWTUNNEL=y ---help--- Allows to run BPF programs as a nexthop action following a route From 13443154f6cac61d148471ede6d7f1f6b5ea946a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 15 Feb 2019 20:14:15 +0000 Subject: [PATCH 1244/1436] MIPS: eBPF: Always return sign extended 32b values The function prototype used to call JITed eBPF code (ie. the type of the struct bpf_prog bpf_func field) returns an unsigned int. The MIPS n64 ABI that MIPS64 kernels target defines that 32 bit integers should always be sign extended when passed in registers as either arguments or return values. This means that when returning any value which may not already be sign extended (ie. of type REG_64BIT or REG_32BIT_ZERO_EX) we need to perform that sign extension in order to comply with the n64 ABI. Without this we see strange looking test failures from test_bpf.ko, such as: test_bpf: #65 ALU64_MOV_X: dst = 4294967295 jited:1 ret -1 != -1 FAIL (1 times) Although the return value printed matches the expected value, this is only because printf is only examining the least significant 32 bits of the 64 bit register value we returned. The register holding the expected value is sign extended whilst the v0 register was set to a zero extended value by our JITed code, so when compared by a conditional branch instruction the values are not equal. We already handle this when the return value register is of type REG_32BIT_ZERO_EX, so simply extend this to also cover REG_64BIT. Signed-off-by: Paul Burton Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Daniel Borkmann --- arch/mips/net/ebpf_jit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index b16710a8a9e7..715415fa2345 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -343,12 +343,15 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) const struct bpf_prog *prog = ctx->skf; int stack_adjust = ctx->stack_size; int store_offset = stack_adjust - 8; + enum reg_val_type td; int r0 = MIPS_R_V0; - if (dest_reg == MIPS_R_RA && - get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX) + if (dest_reg == MIPS_R_RA) { /* Don't let zero extended value escape. */ - emit_instr(ctx, sll, r0, r0, 0); + td = get_reg_val_type(ctx, prog->len, BPF_REG_0); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + emit_instr(ctx, sll, r0, r0, 0); + } if (ctx->flags & EBPF_SAVE_RA) { emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); From 1910faebf61d85a5b7138c0c1c600672e41f82a3 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 15 Feb 2019 20:14:16 +0000 Subject: [PATCH 1245/1436] MIPS: eBPF: Remove REG_32BIT_ZERO_EX REG_32BIT_ZERO_EX and REG_64BIT are always handled in exactly the same way, and reg_val_propagate_range() never actually sets any register to type REG_32BIT_ZERO_EX. Remove the redundant & unused REG_32BIT_ZERO_EX. Signed-off-by: Paul Burton Signed-off-by: Daniel Borkmann --- arch/mips/net/ebpf_jit.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 715415fa2345..76e9bf88d3b9 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -79,8 +79,6 @@ enum reg_val_type { REG_64BIT_32BIT, /* 32-bit compatible, need truncation for 64-bit ops. */ REG_32BIT, - /* 32-bit zero extended. */ - REG_32BIT_ZERO_EX, /* 32-bit no sign/zero extension needed. */ REG_32BIT_POS }; @@ -349,7 +347,7 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) if (dest_reg == MIPS_R_RA) { /* Don't let zero extended value escape. */ td = get_reg_val_type(ctx, prog->len, BPF_REG_0); - if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + if (td == REG_64BIT) emit_instr(ctx, sll, r0, r0, 0); } @@ -695,7 +693,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (dst < 0) return dst; td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + if (td == REG_64BIT) { /* sign extend */ emit_instr(ctx, sll, dst, dst, 0); } @@ -710,7 +708,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (dst < 0) return dst; td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + if (td == REG_64BIT) { /* sign extend */ emit_instr(ctx, sll, dst, dst, 0); } @@ -724,7 +722,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (dst < 0) return dst; td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + if (td == REG_64BIT) /* sign extend */ emit_instr(ctx, sll, dst, dst, 0); if (insn->imm == 1) { @@ -863,13 +861,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (src < 0 || dst < 0) return -EINVAL; td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + if (td == REG_64BIT) { /* sign extend */ emit_instr(ctx, sll, dst, dst, 0); } did_move = false; ts = get_reg_val_type(ctx, this_idx, insn->src_reg); - if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) { + if (ts == REG_64BIT) { int tmp_reg = MIPS_R_AT; if (bpf_op == BPF_MOV) { @@ -1257,8 +1255,7 @@ jeq_common: if (insn->imm == 64 && td == REG_32BIT) emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - if (insn->imm != 64 && - (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) { + if (insn->imm != 64 && td == REG_64BIT) { /* sign extend */ emit_instr(ctx, sll, dst, dst, 0); } From 86b39a66b75f2f360eef56bdb5ac37d5068a9b7d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Wed, 13 Feb 2019 10:52:34 -0600 Subject: [PATCH 1246/1436] net/mlx5: Correctly set LAG mode for ECPF When bonding is added, driver assumes that it's RoCE LAG if no VF is enabled. This is not enough for ECPF as the VF is enabled in host PF side. LAG should only choose RoCE mode when both slave devices meet conditions below: 1. E-Switch offloads mode is NONE. 2. No VF is enabled. Signed-off-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 2d223385dc81..04c5aca7f8c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -343,6 +343,11 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); +#ifdef CONFIG_MLX5_ESWITCH + roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE; +#endif + if (roce_lag) mlx5_lag_remove_ib_devices(ldev); From a1b3839ac4a4933c7c5167efd7b6b091130d11aa Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 8 Nov 2018 22:37:04 +0200 Subject: [PATCH 1247/1436] net/mlx5: E-Switch, Properly refer to the esw manager vport In SmartNIC mode, the eswitch manager is not necessarily the PF (vport 0). Use a helper function to get the correct eswitch manager vport number and cache on the eswitch instance for fast reference. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 ++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 7 ++-- include/linux/mlx5/vport.h | 2 ++ 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 05830696abd8..9c622749dbde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -378,16 +378,16 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -410,10 +410,10 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -1457,15 +1457,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1537,8 +1544,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1733,6 +1743,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 0a3eee8746c1..959a9e28d08f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "lib/mpfs.h" @@ -204,6 +205,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -363,6 +365,14 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9128b45f3f37..af2c44d31357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -522,7 +522,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -567,7 +568,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, source_eswitch_owner_vhca_id); dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest->vport.num = 0; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } @@ -666,7 +667,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 3bc05449ac39..b67bcc95ab5d 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -52,6 +52,8 @@ enum { }; enum { + MLX5_VPORT_PF = 0x0, + MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; From cbc44e76bfcdcaccd079487367593ee3f94d006d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 1 Feb 2019 17:34:55 -0600 Subject: [PATCH 1248/1436] net/mlx5: E-Switch, Properly refer to host PF vport as other vport Commands referring to vports use the following scheme: 1. When referring to my own vport, put 0 in vport and 0 in other_vport. 2. When referring to another vport, put the vport number of the referred vport and put 1 in other_vport. It was assumed that driver is accessing other vport when vport number is greater than 0. With the above scheme, the case that ECPF eswitch manager is trying to access host PF vport will fall over with scheme 1 as the vport number is 0. This is apparently wrong as driver is trying to refer other vport. As such usage can only happen in the eswitch context, change relevant functions to provide other vport input properly. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 ++++++------- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 10 ++++------ include/linux/mlx5/vport.h | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 685f1975be58..f84889bbe2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1083,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -1101,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9c622749dbde..648c743cc947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1462,7 +1462,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); /* Host PF has its own mac/guid. */ @@ -1581,10 +1581,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1875,7 +1875,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -2137,7 +2137,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2174,8 +2174,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9a928eb48522..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b67bcc95ab5d..b7edcb1dadd8 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -59,7 +59,7 @@ enum { u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state); + u16 vport, u8 other_vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -121,7 +121,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, From eca8cc3895358b4c35a3ed439537b8c08d7dabb3 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 7 Feb 2019 10:40:58 -0600 Subject: [PATCH 1249/1436] net/mlx5: E-Switch, Refactor offloads flow steering init/cleanup E-switch offloads mode initialize/cleanup multiple steering related entities (flow table/group). Refactor these operations to internal helper functions for better block design. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index af2c44d31357..19969d487a01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1404,7 +1404,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) { int err; @@ -1422,6 +1422,34 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto create_fg_err; + return 0; + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_tables(esw); + + return err; +} + +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + + err = esw_offloads_steering_init(esw, nvports); + if (err) + return err; + err = esw_offloads_load_reps(esw, nvports); if (err) goto err_reps; @@ -1430,14 +1458,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) return 0; err_reps: - esw_destroy_vport_rx_group(esw); - -create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: - esw_destroy_offloads_fdb_tables(esw); - + esw_offloads_steering_cleanup(esw); return err; } @@ -1464,9 +1485,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) { esw_offloads_devcom_cleanup(esw); esw_offloads_unload_reps(esw, nvports); - esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_tables(esw); + esw_offloads_steering_cleanup(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) From c9b99abcf232f69ddff158b1f313fd7d2654414b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 14:40:53 -0600 Subject: [PATCH 1250/1436] net/mlx5: E-Switch, Split VF and special vports for offloads mode When driver is entering offloads mode, there are two major tasks to do: initialize flow steering and create representors. Flow steering should make sure enough flow table/group spaces are reserved for all reps. Representors will be created in a group, all or none. With the introduction of ECPF, flow steering should still reserve the same spaces. But, the representors are not always loaded/unloaded in a single piece. Once ECPF is in offloads mode, it will get the number of VF changing event from host PF. In such scenario, only the VF reps should be loaded/unloaded, not the reps for special vports (such as the uplink vport). Thus, when entering offloads mode, driver should specify the total number of reps, and the number of VF reps separately. When leaving offloads mode, the cleanup should use the information self-contained in eswitch such as number of VFs. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 7 +-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +- .../mellanox/mlx5/core/eswitch_offloads.c | 57 +++++++++++++------ include/linux/mlx5/vport.h | 1 + 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 648c743cc947..be6c2931d2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1641,7 +1641,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); + err = esw_offloads_init(esw, nvfs, + nvfs + MLX5_SPECIAL_VPORTS); } if (err) @@ -1683,7 +1684,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1693,7 +1693,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); @@ -1709,7 +1708,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 959a9e28d08f..fd845e6c44d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -208,8 +208,9 @@ struct mlx5_eswitch { u16 manager_vport; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 19969d487a01..14f7ad67cfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -54,6 +54,8 @@ enum { #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -1239,19 +1241,28 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) return 0; } +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return; + + rep->rep_if[rep_type].unload(rep); +} + static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { struct mlx5_eswitch_rep *rep; int vport; - for (vport = nvports - 1; vport >= 0; vport--) { + for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) { rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; - - rep->rep_if[rep_type].unload(rep); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + __esw_offloads_unload_rep(esw, rep, rep_type); } static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) @@ -1262,6 +1273,15 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) esw_offloads_unload_reps_type(esw, nvports, rep_type); } +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return 0; + + return rep->rep_if[rep_type].load(esw->dev, rep); +} + static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { @@ -1269,12 +1289,14 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto out; - err = rep->rep_if[rep_type].load(esw->dev, rep); + for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto err_reps; } @@ -1283,6 +1305,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, err_reps: esw_offloads_unload_reps_type(esw, vport, rep_type); +out: return err; } @@ -1440,17 +1463,18 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) { int err; mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_offloads_steering_init(esw, nvports); + err = esw_offloads_steering_init(esw, total_nvports); if (err) return err; - err = esw_offloads_load_reps(esw, nvports); + err = esw_offloads_load_reps(esw, vf_nvports); if (err) goto err_reps; @@ -1481,10 +1505,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { + u16 num_vfs = esw->dev->priv.sriov.num_vfs; + esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, nvports); + esw_offloads_unload_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); } @@ -1822,7 +1848,6 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b7edcb1dadd8..755aeea19e1c 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -53,6 +53,7 @@ enum { enum { MLX5_VPORT_PF = 0x0, + MLX5_VPORT_FIRST_VF = 0x1, MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; From 879c8f84e3602961e441723aaaac372a44dfe588 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 28 Jan 2019 22:12:45 -0600 Subject: [PATCH 1251/1436] net/mlx5: E-Switch, Use getter and iterator to access vport/rep With only PF and VF, it is sufficient to have the vport/rep array index as the vport number. This is because PF and VF vports numbers are consecutive serial numbers. In downstream patches with introducing of ECPF and UPLINK vports, it's not consecutive any more. Use getter to get specific vport/rep, and use iterator to traversal a list of vport/rep. This hides the translation between array index and vport number, and provides flexibility of using different translation mechanism in the future. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Suggested-by: Saeed Mahameed Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 94 ++++++++++++------- .../mellanox/mlx5/core/eswitch_offloads.c | 75 ++++++++++----- 2 files changed, 113 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index be6c2931d2a0..d1454f18c0a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -68,6 +68,26 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[i], \ + (i) <= (nvfs); (i)++) + +static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, + u16 vport_num) +{ + WARN_ON(vport_num > esw->total_vports - 1); + return &esw->vports[vport_num]; +} + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -436,17 +456,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u16 vport_idx = 0; + struct mlx5_vport *vport; + u16 i, vport_num; - for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { - struct mlx5_vport *vport = &esw->vports[vport_idx]; + mlx5_esw_for_all_vports(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, mac, struct vport_addr); + vport_num = vport->vport; if (IS_ERR_OR_NULL(vport->allmulti_rule) || - vaddr->vport == vport_idx) + vaddr->vport == vport_num) continue; switch (vaddr->action) { case MLX5_ACTION_ADD: @@ -458,14 +479,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, if (!iter_vaddr) { esw_warn(esw->dev, "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", - mac, vport_idx); + mac, vport_num); continue; } - iter_vaddr->vport = vport_idx; + iter_vaddr->vport = vport_num; iter_vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, - vport_idx); + vport_num); iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: @@ -564,7 +585,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -599,7 +620,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -686,7 +707,7 @@ out: */ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -722,8 +743,8 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; - struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) goto promisc; @@ -764,7 +785,7 @@ promisc: /* Sync vport rx mode from vport context */ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; @@ -1341,8 +1362,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, u32 initial_max_rate, u32 initial_bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; int err = 0; @@ -1381,7 +1402,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err = 0; if (!vport->qos.enabled) @@ -1400,8 +1421,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, u32 max_rate, u32 bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; u32 bitmask = 0; @@ -1518,10 +1539,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, +static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int enable_events) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1558,9 +1579,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, mutex_unlock(&esw->state_lock); } -static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_disable_vport(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; if (!vport->enabled) return; @@ -1603,7 +1625,7 @@ static int eswitch_vport_event(struct notifier_block *nb, u16 vport_num; vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); - vport = &esw->vports[vport_num]; + vport = mlx5_eswitch_get_vport(esw, vport_num); if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); @@ -1615,6 +1637,7 @@ static int eswitch_vport_event(struct notifier_block *nb, int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1657,8 +1680,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) * 2. FDB/Eswitch is programmed by user space tools */ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; - for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, enabled_events); + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + esw_enable_vport(esw, vport, enabled_events); if (mode == SRIOV_LEGACY) { MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); @@ -1683,6 +1712,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + struct mlx5_vport *vport; int old_mode; int i; @@ -1697,8 +1727,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - for (i = 0; i < esw->total_vports; i++) - esw_disable_vport(esw, i); + mlx5_esw_for_all_vports(esw, i, vport) + esw_disable_vport(esw, vport); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); @@ -1725,6 +1755,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; + struct mlx5_vport *vport; int vport_num; int err; @@ -1757,6 +1788,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->total_vports = total_vports; + err = esw_offloads_init_reps(esw); if (err) goto abort; @@ -1765,9 +1798,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - for (vport_num = 0; vport_num < total_vports; vport_num++) { - struct mlx5_vport *vport = &esw->vports[vport_num]; - + mlx5_esw_for_all_vports(esw, vport_num, vport) { vport->vport = vport_num; vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; @@ -1775,7 +1806,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw_vport_change_handler); } - esw->total_vports = total_vports; esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -2017,8 +2047,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; max_guarantee = evport->info.min_rate; @@ -2037,8 +2066,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled) continue; vport_min_rate = evport->info.min_rate; @@ -2053,7 +2081,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, i, vport_max_rate, + err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 14f7ad67cfe4..4979c7ee0ad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -56,6 +56,44 @@ enum { #define UPLINK_REP_INDEX 0 +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = vport_num; + + if (vport_num == MLX5_VPORT_UPLINK) + idx = UPLINK_REP_INDEX; + + WARN_ON(idx > esw->total_vports - 1); + return &esw->offloads.vport_reps[idx]; +} + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -604,7 +642,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - for (i = 1; i < nvports; i++) { + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); @@ -622,7 +660,8 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_flow_err: - for (i--; i > 0; i--) + nvports = --i; + mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); kvfree(flows); alloc_flows_err: @@ -637,7 +676,7 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) flows = esw->fdb_table.offloads.peer_miss_rules; - for (i = 1; i < esw->total_vports; i++) + mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); kvfree(flows); @@ -1229,9 +1268,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); - for (vport = 0; vport < total_vfs; vport++) { - rep = &offloads->vport_reps[vport]; - + mlx5_esw_for_all_reps(esw, vport, rep) { rep->vport = vport; ether_addr_copy(rep->hw_id, hw_id); } @@ -1256,12 +1293,10 @@ static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, struct mlx5_eswitch_rep *rep; int vport; - for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) { - rep = &esw->offloads.vport_reps[vport]; + mlx5_esw_for_each_vf_rep_reverse(esw, vport, rep, nvports) __esw_offloads_unload_rep(esw, rep, rep_type); - } - rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } @@ -1289,13 +1324,12 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, int vport; int err; - rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto out; - for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; + mlx5_esw_for_each_vf_rep(esw, vport, rep, nvports) { err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto err_reps; @@ -1304,7 +1338,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, return 0; err_reps: - esw_offloads_unload_reps_type(esw, vport, rep_type); + esw_offloads_unload_reps_type(esw, --vport, rep_type); out: return err; } @@ -1848,10 +1882,9 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_if[rep_type].priv; } @@ -1859,13 +1892,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == MLX5_VPORT_UPLINK) - vport = UPLINK_REP_INDEX; - - rep = &offloads->vport_reps[vport]; + rep = mlx5_eswitch_get_rep(esw, vport); if (rep->rep_if[rep_type].valid && rep->rep_if[rep_type].get_proto_dev) @@ -1876,13 +1905,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { - return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, int vport) { - return &esw->offloads.vport_reps[vport]; + return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); From f121e0ea9586b2c937bf1ff9a0b682dc6424ce1d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 21:48:31 -0600 Subject: [PATCH 1252/1436] net/mlx5: E-Switch, Add state to eswitch vport representors Currently the eswitch vport reps have a valid indicator, which is set on register and unset on unregister. However, a rep can be loaded or not loaded when doing unregister, current driver checks if the vport of that rep is enabled as a flag to imply the rep is loaded. However, for ECPF, this is not valid as the host PF will enable the vports for its VFs instead. Add three states: {unregistered, registered, loaded}, with the following state changes across different operations: create: (none) -> unregistered reg: unregistered -> registered load: registered -> loaded unload: loaded -> registered unreg: registered -> unregistered Note that the state shall only be updated inside eswitch driver rather than individual drivers such as ETH or IB. Signed-off-by: Bodong Wang Signed-off-by: Or Gerlitz Suggested-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++------ include/linux/mlx5/eswitch.h | 8 ++++- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4979c7ee0ad7..c6c9dad69ba8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -364,7 +364,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1256,7 +1256,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) struct mlx5_core_dev *dev = esw->dev; struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1271,6 +1271,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) mlx5_esw_for_all_reps(esw, vport, rep) { rep->vport = vport; ether_addr_copy(rep->hw_id, hw_id); + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; @@ -1281,10 +1284,11 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + if (rep->rep_if[rep_type].state != REP_LOADED) return; rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; } static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1311,10 +1315,18 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) return 0; - return rep->rep_if[rep_type].load(esw->dev, rep); + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; } static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1861,7 +1873,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); @@ -1873,10 +1885,11 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) + if (esw->mode == SRIOV_OFFLOADS && + rep->rep_if[rep_type].state == REP_LOADED) rep->rep_if[rep_type].unload(rep); - rep->rep_if[rep_type].valid = false; + rep->rep_if[rep_type].state = REP_UNREGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); @@ -1896,7 +1909,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index fab5121ffb8f..e3dbc1bc0917 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -22,6 +22,12 @@ enum { NUM_REP_TYPES, }; +enum { + REP_UNREGISTERED, + REP_REGISTERED, + REP_LOADED, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_if { int (*load)(struct mlx5_core_dev *dev, @@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if { void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); void *priv; - bool valid; + u8 state; }; struct mlx5_eswitch_rep { From 29d9fd7d5a667b120f8228da3d8a8d1b2382538d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 22:57:21 -0600 Subject: [PATCH 1253/1436] net/mlx5: E-Switch, Support load/unload reps of specific vport types Currently the driver loads and unloads all reps in an unbreakable group. However, with ECPF, the reps of special vports such as uplink and host PF should always be loaded in switchdev mode where the reps for VFs will be loaded on-demand and unloaded on no-demand. This is a pre-step for that change. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 111 ++++++++++++++---- 1 file changed, 88 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c6c9dad69ba8..7131d41796fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1291,25 +1291,47 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, rep->rep_if[rep_type].state = REP_REGISTERED; } -static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; - - mlx5_esw_for_each_vf_rep_reverse(esw, vport, rep, nvports) - __esw_offloads_unload_rep(esw, rep, rep_type); rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } -static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, nvports, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + __unload_reps_vf_vport(esw, nvports, rep_type); + + /* Special vports must be the last to unload. */ + __unload_reps_special_vport(esw, rep_type); +} + +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + __unload_reps_all_vport(esw, nvports, rep_type); } static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, @@ -1329,39 +1351,42 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, return 0; } -static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; int err; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); - if (err) - goto out; + return err; +} - mlx5_esw_for_each_vf_rep(esw, vport, rep, nvports) { +static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int err, i; + + mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) - goto err_reps; + goto err_vf; } return 0; -err_reps: - esw_offloads_unload_reps_type(esw, --vport, rep_type); -out: +err_vf: + __unload_reps_vf_vport(esw, --i, rep_type); return err; } -static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = esw_offloads_load_reps_type(esw, nvports, rep_type); + err = __load_reps_vf_vport(esw, nvports, rep_type); if (err) goto err_reps; } @@ -1370,7 +1395,47 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, nvports, rep_type); + return err; +} + +static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + int err; + + /* Special vports must be loaded first. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); + return err; +} + +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __load_reps_all_vport(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + +err_reps: + while (rep_type-- > 0) + __unload_reps_all_vport(esw, nvports, rep_type); return err; } @@ -1520,7 +1585,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; - err = esw_offloads_load_reps(esw, vf_nvports); + err = esw_offloads_load_all_reps(esw, vf_nvports); if (err) goto err_reps; @@ -1556,7 +1621,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) u16 num_vfs = esw->dev->priv.sriov.num_vfs; esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); } From f8e8fa0262eaf544490a11746c524333158ef0b6 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 17:42:57 -0600 Subject: [PATCH 1254/1436] net/mlx5: E-Switch, Centralize repersentor reg/unreg to eswitch driver Eswitch has two users: IB and ETH. They both register repersentors when mlx5 interface is added, and unregister the repersentors when mlx5 interface is removed. Ideally, each driver should only deal with the entities which are unique to itself. However, current IB and ETH drivers have to perform the following eswitch operations: 1. When registering, specify how many vports to register. This number is the same for both drivers which is the total available vport numbers. 2. When unregistering, specify the number of registered vports to do unregister. Also, unload the repersentors which are already loaded. It's unnecessary for eswitch driver to hands out the control of above operations to individual driver users, as they're not unique to each driver. Instead, such operations should be centralized to eswitch driver. This consolidates eswitch control flow, and simplified IB and ETH driver. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 18 ++---- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 19 ++----- .../mellanox/mlx5/core/eswitch_offloads.c | 55 +++++++++---------- include/linux/mlx5/eswitch.h | 11 ++-- 4 files changed, 43 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 99cae9a10195..4700cffb5a00 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -95,26 +95,20 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); struct mlx5_eswitch_rep_if rep_if = {}; - int vport; - for (vport = 0; vport < total_vports; vport++) { - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); - } + rep_if.load = mlx5_ib_vport_rep_load; + rep_if.unload = mlx5_ib_vport_rep_unload; + rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; + + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vports - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); } u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f84889bbe2a0..287d48e5b073 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1798,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; + struct mlx5_eswitch_rep_if rep_if = {}; - for (vport = 0; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vfs - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7131d41796fb..b702b56c457e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1923,40 +1923,39 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; - - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - rep_if->state = REP_REGISTERED; -} -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); - -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) -{ - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - if (esw->mode == SRIOV_OFFLOADS && - rep->rep_if[rep_type].state == REP_LOADED) - rep->rep_if[rep_type].unload(rep); - - rep->rep_if[rep_type].state = REP_UNREGISTERED; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); + +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) +{ + u16 max_vf = mlx5_core_max_vfs(esw->dev); + struct mlx5_eswitch_rep *rep; + int i; + + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); + + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; +} +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index e3dbc1bc0917..96d8435421de 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -46,13 +46,10 @@ struct mlx5_eswitch_rep { u32 vlan_refcount; }; -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type); From 5ae5162066d8e59e365678a9e76fc4d8f6b78d40 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 14 Dec 2018 09:33:22 -0600 Subject: [PATCH 1255/1436] net/mlx5: E-Switch, Assign a different position for uplink rep and vport In offloads mode, the current implementation puts the uplink representor at index zero of the vport reps array. It is not "natural" to place it at index 0 since we want to put the representor for vport 0 at index 0 with the introduction of SmartNIC. A separate patch will handle the case whether a rep is needed for vport 0 (PF vport). So, we want to have a different placeholder for uplink vport and representor. It was placed at the end of vport and rep array. Since vport number can no longer act as an index into the vport or representors arrays, use functions to map vport numbers to indices when accessing the vports or representors arrays, and vice versa. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++---- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 24 +++++++++++++++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 11 ++------- include/linux/mlx5/vport.h | 11 ++++++--- 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d1454f18c0a7..bb7f72467df9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -84,8 +84,10 @@ enum { static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + WARN_ON(vport_num > esw->total_vports - 1); - return &esw->vports[vport_num]; + return &esw->vports[idx]; } static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, @@ -1756,8 +1758,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; - int vport_num; - int err; + int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; @@ -1798,8 +1799,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - mlx5_esw_for_all_vports(esw, vport_num, vport) { - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd845e6c44d5..2951c1296c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -374,6 +374,30 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b702b56c457e..e787e9212174 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -85,10 +85,7 @@ enum { static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx = vport_num; - - if (vport_num == MLX5_VPORT_UPLINK) - idx = UPLINK_REP_INDEX; + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); WARN_ON(idx > esw->total_vports - 1); return &esw->offloads.vport_reps[idx]; @@ -1254,7 +1251,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; int vport; @@ -1265,19 +1261,16 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = vport; + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) rep->rep_if[rep_type].state = REP_UNREGISTERED; } - offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; - return 0; } diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 755aeea19e1c..134248c02786 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,9 +36,14 @@ #include #include -#define MLX5_VPORT_PF_PLACEHOLDER (1u) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) + +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER) + +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ From 81cd229c294e2e416e9161d9286d34f3aaf19348 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Dec 2018 11:59:33 -0600 Subject: [PATCH 1256/1436] net/mlx5: E-Switch, Consider ECPF vport depends on eswitch ownership ECPF connects to the eswitch through vport 0xfffe. ECPF may or may not be the eswitch manager depending on firmware configuration. 1. If ECPF is eswitch manager: ECPF will take over the eswitch manager responsibility. A rep of the host PF shall be created at the ECPF side for the eswitch manager to control. 2. If ECPF is not eswitch manager: host PF will be the eswitch manager, ECPF acts similar as a VF to the host PF. Host PF will be aware of the ECPF vport presence and control it's rep. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++++ .../mellanox/mlx5/core/eswitch_offloads.c | 79 ++++++++++++++++++- include/linux/mlx5/driver.h | 5 ++ include/linux/mlx5/mlx5_ifc.h | 3 +- include/linux/mlx5/vport.h | 8 +- 6 files changed, 110 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bb7f72467df9..d2ab1ee19b2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1667,7 +1667,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); err = esw_offloads_init(esw, nvfs, - nvfs + MLX5_SPECIAL_VPORTS); + nvfs + MLX5_SPECIAL_VPORTS(esw->dev)); } if (err) @@ -1687,6 +1687,12 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); esw_enable_vport(esw, vport, enabled_events); + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + /* Enable VF vports */ mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) esw_enable_vport(esw, vport, enabled_events); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2951c1296c3e..2baa0d71380c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -380,9 +380,20 @@ static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) return esw->total_vports - 1; } +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, u16 vport_num) { + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + if (vport_num == MLX5_VPORT_UPLINK) return mlx5_eswitch_uplink_idx(esw); @@ -392,6 +403,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, int index) { + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + if (index == mlx5_eswitch_uplink_idx(esw)) return MLX5_VPORT_UPLINK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e787e9212174..84a33f8e3350 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -639,14 +639,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); - esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); - goto add_flow_err; + goto add_vf_flow_err; } flows[i] = flow; } @@ -656,10 +677,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, kvfree(spec); return 0; -add_flow_err: +add_vf_flow_err: nvports = --i; mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); kvfree(flows); alloc_flows_err: kvfree(spec); @@ -676,6 +705,12 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + kvfree(flows); } @@ -1288,6 +1323,16 @@ static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } @@ -1351,6 +1396,34 @@ static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c5454f985e1d..c2de50f02b33 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,6 +1088,11 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } +static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); +} + #define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5decffe565fb..b7bb774b57b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -631,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x17]; + u8 reserved_at_5[0x16]; + u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; u8 nic_vport_node_guid_modify[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 134248c02786..0eef548b9946 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -38,11 +38,13 @@ #define MLX5_VPORT_PF_PLACEHOLDER (1u) #define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) +#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev)) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ - MLX5_VPORT_UPLINK_PLACEHOLDER) +#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER + \ + MLX5_VPORT_ECPF_PLACEHOLDER(mdev)) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS(mdev) + \ mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ From a3888f33db9f55f401ad315481af251d168e57dd Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 23:13:13 -0600 Subject: [PATCH 1257/1436] net/mlx5: E-Switch, Load/unload VF reps according to event from host PF When host PF changes the number of VFs, the ECPF esw driver will get a FW event. It should query the number of VFs enabled by host PF and update the VF reps accordingly. Note that host PF can't change the number of VFs dynamically, it has to reset the number of VFs to 0 before changing to a new positive number. The host event is registered when driver is moving to switchdev mode, and it's the last step to do in esw_offloads_init. It's unregistered and the work queue is flushed when driver quits from switchdev mode. In this way, the host event and devlink command are serialized. When driver is enabling switchdev mode, pay attention to the following two facts: 1. Host PF must not have VF initialized as the flow table in ECPF has ENCAP enabled as default. Such flow table can't be created with existing initialized VFs. 2. ECPF doesn't know how many VFs the host PF will enable, ECPF offloads flow steering shall create the flow table/groups based on the max number of VFs possibly supported by host PF. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 11 +++ .../mellanox/mlx5/core/eswitch_offloads.c | 71 ++++++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d2ab1ee19b2a..e18af31336e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -39,6 +39,7 @@ #include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" +#include "ecpf.h" enum { MLX5_ACTION_NONE = 0, @@ -1639,6 +1640,7 @@ static int eswitch_vport_event(struct notifier_block *nb, int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1657,6 +1659,18 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + if (mode == SRIOV_OFFLOADS) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + if (err) + return err; + total_nvports = esw->total_vports; + } else { + vf_nvports = nvfs; + total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); + } + } + esw->mode = mode; mlx5_lag_update(esw->dev); @@ -1666,8 +1680,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs, - nvfs + MLX5_SPECIAL_VPORTS(esw->dev)); + err = esw_offloads_init(esw, vf_nvports, total_nvports); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2baa0d71380c..af5581a57e56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -182,6 +182,16 @@ struct esw_mc_addr { /* SRIOV only */ u32 refcnt; }; +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_host_info { + struct mlx5_nb nb; + u16 num_vfs; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -206,6 +216,7 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; + struct mlx5_host_info host_info; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 84a33f8e3350..91c4095ac79e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -40,6 +40,8 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" +#include "ecpf.h" +#include "lib/eq.h" enum { FDB_FAST_PATH = 0, @@ -1640,6 +1642,57 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); } +static void esw_host_params_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + int err, num_vf = 0; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); + if (err || num_vf == esw->host_info.num_vfs) + goto out; + + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->host_info.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + } else { + err = esw_offloads_load_vf_reps(esw, num_vf); + + if (err) + goto out; + } + + esw->host_info.num_vfs = num_vf; + +out: + kfree(host_work); +} + +static int esw_host_params_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_host_work *host_work; + struct mlx5_host_info *host_info; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); + esw = container_of(host_info, struct mlx5_eswitch, host_info); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_host_params_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, int total_nvports) { @@ -1656,6 +1709,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, goto err_reps; esw_offloads_devcom_init(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, + HOST_PARAMS_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); + esw->host_info.num_vfs = vf_nvports; + } + return 0; err_reps: @@ -1684,7 +1745,15 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs = esw->dev->priv.sriov.num_vfs; + u16 num_vfs; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); + flush_workqueue(esw->work_queue); + num_vfs = esw->host_info.num_vfs; + } else { + num_vfs = esw->dev->priv.sriov.num_vfs; + } esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw, num_vfs); From c96692fb8f3d0a161c6892ab9cc51a5e9992ccf2 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 20 Dec 2018 17:28:06 -0600 Subject: [PATCH 1258/1436] net/mlx5: E-Switch, Allow transition to offloads mode for ECPF Currently, the e-switch driver requires going to legacy mode before changing to the offloads mode. This makes sense for regular case as the legacy mode is done by creating VFs. However, it's problematic when ECPF is the eswitch manager. In such case, ECPF will control the vports on peer host including the peer PF and VFs. But ECPF doesn't need and shall not create VFs as the VFs are created in the peer PF host. Grant ECPF the ability to change from none to the offloads mode. Note that currently the only way to go back to none mode is by unloading the ECPF driver. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 91c4095ac79e..f2260391be5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1250,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != SRIOV_LEGACY && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; @@ -1846,7 +1847,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE && + !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; return 0; From 4974d5f678abb34401558559d47e2ea3d1c15cba Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 15 Feb 2019 15:10:32 +0100 Subject: [PATCH 1259/1436] net: ip6_gre: initialize erspan_ver just for erspan tunnels After commit c706863bc890 ("net: ip6_gre: always reports o_key to userspace"), ip6gre and ip6gretap tunnels started reporting TUNNEL_KEY output flag even if it is not configured. ip6gre_fill_info checks erspan_ver value to add TUNNEL_KEY for erspan tunnels, however in commit 84581bdae9587 ("erspan: set erspan_ver to 1 by default when adding an erspan dev") erspan_ver is initialized to 1 even for ip6gre or ip6gretap Fix the issue moving erspan_ver initialization in a dedicated routine Fixes: c706863bc890 ("net: ip6_gre: always reports o_key to userspace") Signed-off-by: Lorenzo Bianconi Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 801a9a0c217e..43890898b0b5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } +static void ip6erspan_set_version(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + parms->erspan_ver = 1; + if (data[IFLA_GRE_ERSPAN_VER]) + parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + + if (parms->erspan_ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) + parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + } else if (parms->erspan_ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) + parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + if (data[IFLA_GRE_ERSPAN_HWID]) + parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + } +} + static void ip6gre_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { @@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_COLLECT_METADATA]) parms->collect_md = true; - - parms->erspan_ver = 1; - if (data[IFLA_GRE_ERSPAN_VER]) - parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); - - if (parms->erspan_ver == 1) { - if (data[IFLA_GRE_ERSPAN_INDEX]) - parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); - } else if (parms->erspan_ver == 2) { - if (data[IFLA_GRE_ERSPAN_DIR]) - parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); - if (data[IFLA_GRE_ERSPAN_HWID]) - parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); - } } static int ip6gre_tap_init(struct net_device *dev) @@ -2203,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, int err; ip6gre_netlink_parms(data, &nt->parms); + ip6erspan_set_version(data, &nt->parms); ign = net_generic(net, ip6gre_net_id); if (nt->parms.collect_md) { @@ -2248,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], if (IS_ERR(t)) return PTR_ERR(t); + ip6erspan_set_version(data, &p); ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); From 59e6158acafa2fc80160d8a8241c2a715fe89cff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 15 Feb 2019 15:11:53 +0000 Subject: [PATCH 1260/1436] mlxsw: core: fix spelling mistake "temprature" -> "temperature" There is a spelling mistake in several dev_err messages, fix these. Signed-off-by: Colin Ian King Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index f1ada4cdbd6b..6956bbebe2f1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -208,7 +208,7 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, 1); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); if (err) { - dev_err(dev, "Failed to query module temprature sensor\n"); + dev_err(dev, "Failed to query module temperature sensor\n"); return err; } @@ -251,7 +251,7 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, 1); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); if (err) { - dev_err(dev, "Failed to query module temprature sensor\n"); + dev_err(dev, "Failed to query module temperature sensor\n"); return err; } @@ -291,7 +291,7 @@ mlxsw_hwmon_module_temp_critical_show(struct device *dev, err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_WARN, &temp); if (err) { - dev_err(dev, "Failed to query module temprature thresholds\n"); + dev_err(dev, "Failed to query module temperature thresholds\n"); return err; } @@ -314,7 +314,7 @@ mlxsw_hwmon_module_temp_emergency_show(struct device *dev, err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_ALARM, &temp); if (err) { - dev_err(dev, "Failed to query module temprature thresholds\n"); + dev_err(dev, "Failed to query module temperature thresholds\n"); return err; } From 197f9ab7f08ce4b9ece662f747c3991b2f0fbb57 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Fri, 15 Feb 2019 17:17:08 +0100 Subject: [PATCH 1261/1436] net: phy: xgmiitorgmii: Support generic PHY status read Some PHY drivers like the generic one do not provide a read_status callback on their own but rely on genphy_read_status being called directly. With the current code, this results in a NULL function pointer call. Call genphy_read_status instead when there is no specific callback. Signed-off-by: Paul Kocialkowski Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 74a8782313cf..bd6084e315de 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -44,7 +44,10 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) u16 val = 0; int err; - err = priv->phy_drv->read_status(phydev); + if (priv->phy_drv->read_status) + err = priv->phy_drv->read_status(phydev); + else + err = genphy_read_status(phydev); if (err < 0) return err; From 3b89ea9c5902acccdbbdec307c85edd1bf52515e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 15 Feb 2019 17:58:54 +0100 Subject: [PATCH 1262/1436] net: Fix for_each_netdev_feature on Big endian The features attribute is of type u64 and stored in the native endianes on the system. The for_each_set_bit() macro takes a pointer to a 32 bit array and goes over the bits in this area. On little Endian systems this also works with an u64 as the most significant bit is on the highest address, but on big endian the words are swapped. When we expect bit 15 here we get bit 47 (15 + 32). This patch converts it more or less to its own for_each_set_bit() implementation which works on 64 bit integers directly. This is then completely in host endianness and should work like expected. Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 23 +++++++++++++++++++++-- net/core/dev.c | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2b2a6dce1630..fce28562bed2 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include typedef u64 netdev_features_t; @@ -154,8 +155,26 @@ enum { #define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) -#define for_each_netdev_feature(mask_addr, bit) \ - for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) +/* Finds the next feature with the highest number of the range of start till 0. + */ +static inline int find_next_netdev_feature(u64 feature, unsigned long start) +{ + /* like BITMAP_LAST_WORD_MASK() for u64 + * this sets the most significant 64 - start to 0. + */ + feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1)); + + return fls64(feature) - 1; +} + +/* This goes for the MSB to the LSB through the set feature bits, + * mask_addr should be a u64 and bit an int + */ +#define for_each_netdev_feature(mask_addr, bit) \ + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ + (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/dev.c b/net/core/dev.c index 8e276e0192a1..5d03889502eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8152,7 +8152,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(upper->wanted_features & feature) && (features & feature)) { @@ -8172,7 +8172,7 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", From 1490ed2abc4fa4d7fa6e8498b9fa4243a6ce7f8a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 Feb 2019 18:15:37 +0100 Subject: [PATCH 1263/1436] net/ipv6: prefer rcu_access_pointer() over rcu_dereference() rt6_cache_allowed_for_pmtu() checks for rt->from presence, but it does not access the RCU protected pointer. We can use rcu_access_pointer() and clean-up the code a bit. No functional changes intended. Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc066fdf7e46..87a0561136dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, From d5be7f632bad0f489879eed0ff4b99bd7fe0b74c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 15 Feb 2019 12:15:47 -0500 Subject: [PATCH 1264/1436] net: validate untrusted gso packets without csum offload Syzkaller again found a path to a kernel crash through bad gso input. By building an excessively large packet to cause an skb field to wrap. If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in skb_partial_csum_set. GSO packets that do not set checksum offload are suspicious and rare. Most callers of virtio_net_hdr_to_skb already pass them to skb_probe_transport_header. Move that test forward, change it to detect parse failure and drop packets on failure as those cleary are not one of the legitimate VIRTIO_NET_HDR_GSO types. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Reported-by: syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- include/linux/virtio_net.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95d25b010a25..4c1c82a5678c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2434,7 +2434,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); - else + else if (offset_hint >= 0) skb_set_transport_header(skb, offset_hint); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index cb462f9ab7dd..71f2394abbf7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -57,6 +57,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + } else { + /* gso packets without NEEDS_CSUM do not set transport_offset. + * probe and drop if does not match one of the above types. + */ + if (gso_type) { + skb_probe_transport_header(skb, -1); + if (!skb_transport_header_was_set(skb)) + return -EINVAL; + } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { From ff326d3cdff92a8fce1189f435f0fcaad765b1d4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 10:14:52 -0800 Subject: [PATCH 1265/1436] selftests: forwarding: Add some missing configuration symbols For the forwarding selftests to work, we need network namespaces when using veth/vrf otherwise ping/ping6 commands like these: ip vrf exec vveth0 /bin/ping 192.0.2.2 -c 10 -i 0.1 -w 5 will fail because network namespaces may not be enabled. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 5cd2aed97958..da96eff72a8e 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -10,3 +10,5 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m CONFIG_VETH=m +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y From 0aa35a368933873dbab2d2ab54e5d05d3b089245 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:25 +0100 Subject: [PATCH 1266/1436] s390/qeth: allow manual recovery when device is SOFTSETUP Once a qeth ccwgroup device is set online, it's also armed for internal recovery. So allow for testing that code path via sysfs, regardless of whether the interface is up or down. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 30f61608fa22..485a335669ba 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -316,7 +316,7 @@ static ssize_t qeth_dev_recover_store(struct device *dev, if (!card) return -EINVAL; - if (card->state != CARD_STATE_UP) + if (!qeth_card_hw_is_reachable(card)) return -EPERM; i = simple_strtoul(buf, &tmp, 16); From bb92d3f8667f674ae6b2150406f19b33d5ad5f85 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:26 +0100 Subject: [PATCH 1267/1436] s390/qeth: use a static Output Queue array qeth dynamically allocates an array for storing pointers to its Output Queue structures. Switch this to a static array - we are currently limited to 4 Output Queues, so shrinking the qeth_qdio_info struct by just a few bytes doesn't justify the additional complexity. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 18696ffb662d..2ea5d7c0b94c 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -528,7 +528,7 @@ struct qeth_qdio_info { /* output */ int no_out_queues; - struct qeth_qdio_out_q **out_qs; + struct qeth_qdio_out_q *out_qs[QETH_MAX_QUEUES]; struct qdio_outbuf_state *out_bufstates; /* priority queueing */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5ca934775c42..d01a4aded4cc 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2433,12 +2433,6 @@ static int qeth_alloc_qdio_buffers(struct qeth_card *card) goto out_freeinq; /* outbound */ - card->qdio.out_qs = - kcalloc(card->qdio.no_out_queues, - sizeof(struct qeth_qdio_out_q *), - GFP_KERNEL); - if (!card->qdio.out_qs) - goto out_freepool; for (i = 0; i < card->qdio.no_out_queues; ++i) { card->qdio.out_qs[i] = qeth_alloc_qdio_out_buf(); if (!card->qdio.out_qs[i]) @@ -2468,11 +2462,10 @@ out_freeoutqbufs: card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) + while (i > 0) { qeth_free_output_queue(card->qdio.out_qs[--i]); - kfree(card->qdio.out_qs); - card->qdio.out_qs = NULL; -out_freepool: + card->qdio.out_qs[i] = NULL; + } qeth_free_buffer_pool(card); out_freeinq: qeth_free_qdio_queue(card->qdio.in_q); @@ -2501,11 +2494,9 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) /* inbound buffer pool */ qeth_free_buffer_pool(card); /* free outbound qdio_qs */ - if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; i++) - qeth_free_output_queue(card->qdio.out_qs[i]); - kfree(card->qdio.out_qs); - card->qdio.out_qs = NULL; + for (i = 0; i < card->qdio.no_out_queues; i++) { + qeth_free_output_queue(card->qdio.out_qs[i]); + card->qdio.out_qs[i] = NULL; } } From 4326b5b4616bd6b4b1a1c09961a383b4fceed20c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:27 +0100 Subject: [PATCH 1268/1436] s390/qeth: reduce ethtool statistics Counting the number of function calls and the time spent in functions is best left to proper tracing facilities. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 29 ----------- drivers/s390/net/qeth_core_main.c | 85 +++---------------------------- drivers/s390/net/qeth_l2_main.c | 8 --- drivers/s390/net/qeth_l3_main.c | 8 --- 4 files changed, 7 insertions(+), 123 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 2ea5d7c0b94c..9928728649eb 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -126,30 +126,6 @@ struct qeth_perf_stats { unsigned int sc_dp_p; unsigned int sc_p_dp; - /* qdio_cq_handler: number of times called, time spent in */ - __u64 cq_start_time; - unsigned int cq_cnt; - unsigned int cq_time; - /* qdio_input_handler: number of times called, time spent in */ - __u64 inbound_start_time; - unsigned int inbound_cnt; - unsigned int inbound_time; - /* qeth_send_packet: number of times called, time spent in */ - __u64 outbound_start_time; - unsigned int outbound_cnt; - unsigned int outbound_time; - /* qdio_output_handler: number of times called, time spent in */ - __u64 outbound_handler_start_time; - unsigned int outbound_handler_cnt; - unsigned int outbound_handler_time; - /* number of calls to and time spent in do_QDIO for inbound queue */ - __u64 inbound_do_qdio_start_time; - unsigned int inbound_do_qdio_cnt; - unsigned int inbound_do_qdio_time; - /* number of calls to and time spent in do_QDIO for outbound queues */ - __u64 outbound_do_qdio_start_time; - unsigned int outbound_do_qdio_cnt; - unsigned int outbound_do_qdio_time; unsigned int large_send_bytes; unsigned int large_send_cnt; unsigned int sg_skbs_sent; @@ -859,11 +835,6 @@ static inline int qeth_get_elements_for_range(addr_t start, addr_t end) return PFN_UP(end) - PFN_DOWN(start); } -static inline int qeth_get_micros(void) -{ - return (int) (get_tod_clock() >> 12); -} - static inline int qeth_get_ip_version(struct sk_buff *skb) { struct vlan_ethhdr *veth = vlan_eth_hdr(skb); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index d01a4aded4cc..e3127e232fb2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3244,17 +3244,8 @@ static void qeth_queue_input_buffer(struct qeth_card *card, int index) * 'index') un-requeued -> this buffer is the first buffer that * will be requeued the next time */ - if (card->options.performance_stats) { - card->perf_stats.inbound_do_qdio_cnt++; - card->perf_stats.inbound_do_qdio_start_time = - qeth_get_micros(); - } rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, queue->next_buf_to_init, count); - if (card->options.performance_stats) - card->perf_stats.inbound_do_qdio_time += - qeth_get_micros() - - card->perf_stats.inbound_do_qdio_start_time; if (rc) { QETH_CARD_TEXT(card, 2, "qinberr"); } @@ -3407,22 +3398,12 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, } netif_trans_update(queue->card->dev); - if (queue->card->options.performance_stats) { - queue->card->perf_stats.outbound_do_qdio_cnt++; - queue->card->perf_stats.outbound_do_qdio_start_time = - qeth_get_micros(); - } qdio_flags = QDIO_FLAG_SYNC_OUTPUT; if (atomic_read(&queue->set_pci_flags_count)) qdio_flags |= QDIO_FLAG_PCI_OUT; atomic_add(count, &queue->used_buffers); - rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, queue->queue_no, index, count); - if (queue->card->options.performance_stats) - queue->card->perf_stats.outbound_do_qdio_time += - qeth_get_micros() - - queue->card->perf_stats.outbound_do_qdio_start_time; if (rc) { queue->card->stats.tx_errors += count; /* ignore temporary SIGA errors without busy condition */ @@ -3529,7 +3510,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, int rc; if (!qeth_is_cq(card, queue)) - goto out; + return; QETH_CARD_TEXT_(card, 5, "qcqhe%d", first_element); QETH_CARD_TEXT_(card, 5, "qcqhc%d", count); @@ -3538,12 +3519,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, if (qdio_err) { netif_stop_queue(card->dev); qeth_schedule_recovery(card); - goto out; - } - - if (card->options.performance_stats) { - card->perf_stats.cq_cnt++; - card->perf_stats.cq_start_time = qeth_get_micros(); + return; } for (i = first_element; i < first_element + count; ++i) { @@ -3571,14 +3547,6 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, } card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init + count) % QDIO_MAX_BUFFERS_PER_Q; - - if (card->options.performance_stats) { - int delta_t = qeth_get_micros(); - delta_t -= card->perf_stats.cq_start_time; - card->perf_stats.cq_time += delta_t; - } -out: - return; } static void qeth_qdio_input_handler(struct ccw_device *ccwdev, @@ -3614,11 +3582,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev, qeth_schedule_recovery(card); return; } - if (card->options.performance_stats) { - card->perf_stats.outbound_handler_cnt++; - card->perf_stats.outbound_handler_start_time = - qeth_get_micros(); - } + for (i = first_element; i < (first_element + count); ++i) { int bidx = i % QDIO_MAX_BUFFERS_PER_Q; buffer = queue->bufs[bidx]; @@ -3664,9 +3628,6 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev, qeth_check_outbound_queue(queue); netif_wake_queue(queue->card->dev); - if (card->options.performance_stats) - card->perf_stats.outbound_handler_time += qeth_get_micros() - - card->perf_stats.outbound_handler_start_time; } /* We cannot use outbound queue 3 for unicast packets on HiperSockets */ @@ -5319,11 +5280,6 @@ int qeth_poll(struct napi_struct *napi, int budget) int done; int new_budget = budget; - if (card->options.performance_stats) { - card->perf_stats.inbound_cnt++; - card->perf_stats.inbound_start_time = qeth_get_micros(); - } - while (1) { if (!card->rx.b_count) { card->rx.qdio_err = 0; @@ -5381,9 +5337,6 @@ int qeth_poll(struct napi_struct *napi, int budget) if (qdio_start_irq(card->data.ccwdev, 0)) napi_schedule(&card->napi); out: - if (card->options.performance_stats) - card->perf_stats.inbound_time += qeth_get_micros() - - card->perf_stats.inbound_start_time; return work_done; } EXPORT_SYMBOL_GPL(qeth_poll); @@ -5984,21 +5937,9 @@ static struct { /* 20 */{"queue 1 buffer usage"}, {"queue 2 buffer usage"}, {"queue 3 buffer usage"}, - {"rx poll time"}, - {"rx poll count"}, - {"rx do_QDIO time"}, - {"rx do_QDIO count"}, - {"tx handler time"}, - {"tx handler count"}, - {"tx time"}, -/* 30 */{"tx count"}, - {"tx do_QDIO time"}, - {"tx do_QDIO count"}, {"tx csum"}, {"tx lin"}, {"tx linfail"}, - {"cq handler count"}, - {"cq handler time"}, {"rx csum"} }; @@ -6046,22 +5987,10 @@ void qeth_core_get_ethtool_stats(struct net_device *dev, atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0; data[22] = (card->qdio.no_out_queues > 3) ? atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0; - data[23] = card->perf_stats.inbound_time; - data[24] = card->perf_stats.inbound_cnt; - data[25] = card->perf_stats.inbound_do_qdio_time; - data[26] = card->perf_stats.inbound_do_qdio_cnt; - data[27] = card->perf_stats.outbound_handler_time; - data[28] = card->perf_stats.outbound_handler_cnt; - data[29] = card->perf_stats.outbound_time; - data[30] = card->perf_stats.outbound_cnt; - data[31] = card->perf_stats.outbound_do_qdio_time; - data[32] = card->perf_stats.outbound_do_qdio_cnt; - data[33] = card->perf_stats.tx_csum; - data[34] = card->perf_stats.tx_lin; - data[35] = card->perf_stats.tx_linfail; - data[36] = card->perf_stats.cq_cnt; - data[37] = card->perf_stats.cq_time; - data[38] = card->perf_stats.rx_csum; + data[23] = card->perf_stats.tx_csum; + data[24] = card->perf_stats.tx_lin; + data[25] = card->perf_stats.tx_linfail; + data[26] = card->perf_stats.rx_csum; } EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index c566139da43d..8931dd6e2caa 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -632,11 +632,6 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, } queue = qeth_get_tx_queue(card, skb, ipv, cast_type); - - if (card->options.performance_stats) { - card->perf_stats.outbound_cnt++; - card->perf_stats.outbound_start_time = qeth_get_micros(); - } netif_stop_queue(dev); if (IS_OSN(card)) @@ -648,9 +643,6 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, if (!rc) { card->stats.tx_packets++; card->stats.tx_bytes += tx_bytes; - if (card->options.performance_stats) - card->perf_stats.outbound_time += qeth_get_micros() - - card->perf_stats.outbound_start_time; netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8eb24c7f2750..012a290f1f4b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2092,11 +2092,6 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, goto tx_drop; queue = qeth_get_tx_queue(card, skb, ipv, cast_type); - - if (card->options.performance_stats) { - card->perf_stats.outbound_cnt++; - card->perf_stats.outbound_start_time = qeth_get_micros(); - } netif_stop_queue(dev); if (ipv == 4 || IS_IQD(card)) @@ -2108,9 +2103,6 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, if (!rc) { card->stats.tx_packets++; card->stats.tx_bytes += tx_bytes; - if (card->options.performance_stats) - card->perf_stats.outbound_time += qeth_get_micros() - - card->perf_stats.outbound_start_time; netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { From d896ac62d0160457913538f6e83fd386dc329a43 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:28 +0100 Subject: [PATCH 1269/1436] s390/qeth: move ethtool code into its own file Most of this is self-contained code. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/Makefile | 2 +- drivers/s390/net/qeth_core.h | 12 +- drivers/s390/net/qeth_core_main.c | 317 +--------------------------- drivers/s390/net/qeth_ethtool.c | 333 ++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 23 +-- drivers/s390/net/qeth_l3_main.c | 10 - 6 files changed, 346 insertions(+), 351 deletions(-) create mode 100644 drivers/s390/net/qeth_ethtool.c diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile index f2d6bbe57a6f..bc55ec316adb 100644 --- a/drivers/s390/net/Makefile +++ b/drivers/s390/net/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o obj-$(CONFIG_SMSGIUCV) += smsgiucv.o obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o obj-$(CONFIG_LCS) += lcs.o -qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o +qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o obj-$(CONFIG_QETH) += qeth.o qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o obj-$(CONFIG_QETH_L2) += qeth_l2.o diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 9928728649eb..5e08f112db13 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -922,6 +921,8 @@ static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card, extern struct qeth_discipline qeth_l2_discipline; extern struct qeth_discipline qeth_l3_discipline; +extern const struct ethtool_ops qeth_ethtool_ops; +extern const struct ethtool_ops qeth_osn_ethtool_ops; extern const struct attribute_group *qeth_generic_attr_groups[]; extern const struct attribute_group *qeth_osn_attr_groups[]; extern const struct attribute_group qeth_device_attr_group; @@ -976,20 +977,15 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); +int qeth_query_card_info(struct qeth_card *card, + struct carrier_info *carrier_info); unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len, int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -int qeth_core_get_sset_count(struct net_device *, int); -void qeth_core_get_ethtool_stats(struct net_device *, - struct ethtool_stats *, u64 *); -void qeth_core_get_strings(struct net_device *, u32, u8 *); -void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...); -int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd); int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback); int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e3127e232fb2..96e4876a4daa 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4698,8 +4698,8 @@ static int qeth_query_card_info_cb(struct qeth_card *card, return 0; } -static int qeth_query_card_info(struct qeth_card *card, - struct carrier_info *carrier_info) +int qeth_query_card_info(struct qeth_card *card, + struct carrier_info *carrier_info) { struct qeth_cmd_buffer *iob; @@ -5623,7 +5623,10 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (!IS_OSN(card)) { + if (IS_OSN(card)) { + dev->ethtool_ops = &qeth_osn_ethtool_ops; + } else { + dev->ethtool_ops = &qeth_ethtool_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->hw_features |= NETIF_F_SG; dev->vlan_features |= NETIF_F_SG; @@ -5911,314 +5914,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } EXPORT_SYMBOL_GPL(qeth_do_ioctl); -static struct { - const char str[ETH_GSTRING_LEN]; -} qeth_ethtool_stats_keys[] = { -/* 0 */{"rx skbs"}, - {"rx buffers"}, - {"tx skbs"}, - {"tx buffers"}, - {"tx skbs no packing"}, - {"tx buffers no packing"}, - {"tx skbs packing"}, - {"tx buffers packing"}, - {"tx sg skbs"}, - {"tx buffer elements"}, -/* 10 */{"rx sg skbs"}, - {"rx sg frags"}, - {"rx sg page allocs"}, - {"tx large kbytes"}, - {"tx large count"}, - {"tx pk state ch n->p"}, - {"tx pk state ch p->n"}, - {"tx pk watermark low"}, - {"tx pk watermark high"}, - {"queue 0 buffer usage"}, -/* 20 */{"queue 1 buffer usage"}, - {"queue 2 buffer usage"}, - {"queue 3 buffer usage"}, - {"tx csum"}, - {"tx lin"}, - {"tx linfail"}, - {"rx csum"} -}; - -int qeth_core_get_sset_count(struct net_device *dev, int stringset) -{ - switch (stringset) { - case ETH_SS_STATS: - return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN); - default: - return -EINVAL; - } -} -EXPORT_SYMBOL_GPL(qeth_core_get_sset_count); - -void qeth_core_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct qeth_card *card = dev->ml_priv; - data[0] = card->stats.rx_packets - - card->perf_stats.initial_rx_packets; - data[1] = card->perf_stats.bufs_rec; - data[2] = card->stats.tx_packets - - card->perf_stats.initial_tx_packets; - data[3] = card->perf_stats.bufs_sent; - data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets - - card->perf_stats.skbs_sent_pack; - data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack; - data[6] = card->perf_stats.skbs_sent_pack; - data[7] = card->perf_stats.bufs_sent_pack; - data[8] = card->perf_stats.sg_skbs_sent; - data[9] = card->perf_stats.buf_elements_sent; - data[10] = card->perf_stats.sg_skbs_rx; - data[11] = card->perf_stats.sg_frags_rx; - data[12] = card->perf_stats.sg_alloc_page_rx; - data[13] = (card->perf_stats.large_send_bytes >> 10); - data[14] = card->perf_stats.large_send_cnt; - data[15] = card->perf_stats.sc_dp_p; - data[16] = card->perf_stats.sc_p_dp; - data[17] = QETH_LOW_WATERMARK_PACK; - data[18] = QETH_HIGH_WATERMARK_PACK; - data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers); - data[20] = (card->qdio.no_out_queues > 1) ? - atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0; - data[21] = (card->qdio.no_out_queues > 2) ? - atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0; - data[22] = (card->qdio.no_out_queues > 3) ? - atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0; - data[23] = card->perf_stats.tx_csum; - data[24] = card->perf_stats.tx_lin; - data[25] = card->perf_stats.tx_linfail; - data[26] = card->perf_stats.rx_csum; -} -EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats); - -void qeth_core_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - switch (stringset) { - case ETH_SS_STATS: - memcpy(data, &qeth_ethtool_stats_keys, - sizeof(qeth_ethtool_stats_keys)); - break; - default: - WARN_ON(1); - break; - } -} -EXPORT_SYMBOL_GPL(qeth_core_get_strings); - -void qeth_core_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - struct qeth_card *card = dev->ml_priv; - - strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", - sizeof(info->driver)); - strlcpy(info->version, "1.0", sizeof(info->version)); - strlcpy(info->fw_version, card->info.mcl_level, - sizeof(info->fw_version)); - snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", - CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card)); -} -EXPORT_SYMBOL_GPL(qeth_core_get_drvinfo); - -/* Helper function to fill 'advertising' and 'supported' which are the same. */ -/* Autoneg and full-duplex are supported and advertised unconditionally. */ -/* Always advertise and support all speeds up to specified, and only one */ -/* specified port type. */ -static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, - int maxspeed, int porttype) -{ - ethtool_link_ksettings_zero_link_mode(cmd, supported); - ethtool_link_ksettings_zero_link_mode(cmd, advertising); - ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); - - ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - - switch (porttype) { - case PORT_TP: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - break; - case PORT_FIBRE: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - break; - default: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - WARN_ON_ONCE(1); - } - - /* partially does fall through, to also select lower speeds */ - switch (maxspeed) { - case SPEED_25000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 25000baseSR_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 25000baseSR_Full); - break; - case SPEED_10000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10000baseT_Full); - case SPEED_1000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Half); - case SPEED_100: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Half); - case SPEED_10: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); - /* end fallthrough */ - break; - default: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); - WARN_ON_ONCE(1); - } -} - -int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct qeth_card *card = netdev->ml_priv; - enum qeth_link_types link_type; - struct carrier_info carrier_info; - int rc; - - if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan)) - link_type = QETH_LINK_TYPE_10GBIT_ETH; - else - link_type = card->info.link_type; - - cmd->base.duplex = DUPLEX_FULL; - cmd->base.autoneg = AUTONEG_ENABLE; - cmd->base.phy_address = 0; - cmd->base.mdio_support = 0; - cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; - cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - - switch (link_type) { - case QETH_LINK_TYPE_FAST_ETH: - case QETH_LINK_TYPE_LANE_ETH100: - cmd->base.speed = SPEED_100; - cmd->base.port = PORT_TP; - break; - case QETH_LINK_TYPE_GBIT_ETH: - case QETH_LINK_TYPE_LANE_ETH1000: - cmd->base.speed = SPEED_1000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_10GBIT_ETH: - cmd->base.speed = SPEED_10000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_25GBIT_ETH: - cmd->base.speed = SPEED_25000; - cmd->base.port = PORT_FIBRE; - break; - default: - cmd->base.speed = SPEED_10; - cmd->base.port = PORT_TP; - } - qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port); - - /* Check if we can obtain more accurate information. */ - /* If QUERY_CARD_INFO command is not supported or fails, */ - /* just return the heuristics that was filled above. */ - rc = qeth_query_card_info(card, &carrier_info); - if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ - return 0; - if (rc) /* report error from the hardware operation */ - return rc; - /* on success, fill in the information got from the hardware */ - - netdev_dbg(netdev, - "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", - carrier_info.card_type, - carrier_info.port_mode, - carrier_info.port_speed); - - /* Update attributes for which we've obtained more authoritative */ - /* information, leave the rest the way they where filled above. */ - switch (carrier_info.card_type) { - case CARD_INFO_TYPE_1G_COPPER_A: - case CARD_INFO_TYPE_1G_COPPER_B: - cmd->base.port = PORT_TP; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_1G_FIBRE_A: - case CARD_INFO_TYPE_1G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_10G_FIBRE_A: - case CARD_INFO_TYPE_10G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port); - break; - } - - switch (carrier_info.port_mode) { - case CARD_INFO_PORTM_FULLDUPLEX: - cmd->base.duplex = DUPLEX_FULL; - break; - case CARD_INFO_PORTM_HALFDUPLEX: - cmd->base.duplex = DUPLEX_HALF; - break; - } - - switch (carrier_info.port_speed) { - case CARD_INFO_PORTS_10M: - cmd->base.speed = SPEED_10; - break; - case CARD_INFO_PORTS_100M: - cmd->base.speed = SPEED_100; - break; - case CARD_INFO_PORTS_1G: - cmd->base.speed = SPEED_1000; - break; - case CARD_INFO_PORTS_10G: - cmd->base.speed = SPEED_10000; - break; - case CARD_INFO_PORTS_25G: - cmd->base.speed = SPEED_25000; - break; - } - - return 0; -} -EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_link_ksettings); - static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c new file mode 100644 index 000000000000..a275f31150b2 --- /dev/null +++ b/drivers/s390/net/qeth_ethtool.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2018 + */ + +#define KMSG_COMPONENT "qeth" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include "qeth_core.h" + +static struct { + const char str[ETH_GSTRING_LEN]; +} qeth_ethtool_stats_keys[] = { +/* 0 */{"rx skbs"}, + {"rx buffers"}, + {"tx skbs"}, + {"tx buffers"}, + {"tx skbs no packing"}, + {"tx buffers no packing"}, + {"tx skbs packing"}, + {"tx buffers packing"}, + {"tx sg skbs"}, + {"tx buffer elements"}, +/* 10 */{"rx sg skbs"}, + {"rx sg frags"}, + {"rx sg page allocs"}, + {"tx large kbytes"}, + {"tx large count"}, + {"tx pk state ch n->p"}, + {"tx pk state ch p->n"}, + {"tx pk watermark low"}, + {"tx pk watermark high"}, + {"queue 0 buffer usage"}, +/* 20 */{"queue 1 buffer usage"}, + {"queue 2 buffer usage"}, + {"queue 3 buffer usage"}, + {"tx csum"}, + {"tx lin"}, + {"tx linfail"}, + {"rx csum"} +}; + +static int qeth_get_sset_count(struct net_device *dev, int stringset) +{ + switch (stringset) { + case ETH_SS_STATS: + return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN); + default: + return -EINVAL; + } +} + +static void qeth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct qeth_card *card = dev->ml_priv; + + data[0] = card->stats.rx_packets - + card->perf_stats.initial_rx_packets; + data[1] = card->perf_stats.bufs_rec; + data[2] = card->stats.tx_packets - + card->perf_stats.initial_tx_packets; + data[3] = card->perf_stats.bufs_sent; + data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets + - card->perf_stats.skbs_sent_pack; + data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack; + data[6] = card->perf_stats.skbs_sent_pack; + data[7] = card->perf_stats.bufs_sent_pack; + data[8] = card->perf_stats.sg_skbs_sent; + data[9] = card->perf_stats.buf_elements_sent; + data[10] = card->perf_stats.sg_skbs_rx; + data[11] = card->perf_stats.sg_frags_rx; + data[12] = card->perf_stats.sg_alloc_page_rx; + data[13] = (card->perf_stats.large_send_bytes >> 10); + data[14] = card->perf_stats.large_send_cnt; + data[15] = card->perf_stats.sc_dp_p; + data[16] = card->perf_stats.sc_p_dp; + data[17] = QETH_LOW_WATERMARK_PACK; + data[18] = QETH_HIGH_WATERMARK_PACK; + data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers); + data[20] = (card->qdio.no_out_queues > 1) ? + atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0; + data[21] = (card->qdio.no_out_queues > 2) ? + atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0; + data[22] = (card->qdio.no_out_queues > 3) ? + atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0; + data[23] = card->perf_stats.tx_csum; + data[24] = card->perf_stats.tx_lin; + data[25] = card->perf_stats.tx_linfail; + data[26] = card->perf_stats.rx_csum; +} + +static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, &qeth_ethtool_stats_keys, + sizeof(qeth_ethtool_stats_keys)); + break; + default: + WARN_ON(1); + break; + } +} + +static void qeth_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct qeth_card *card = dev->ml_priv; + + strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", + sizeof(info->driver)); + strlcpy(info->version, "1.0", sizeof(info->version)); + strlcpy(info->fw_version, card->info.mcl_level, + sizeof(info->fw_version)); + snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", + CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card)); +} + +/* Helper function to fill 'advertising' and 'supported' which are the same. */ +/* Autoneg and full-duplex are supported and advertised unconditionally. */ +/* Always advertise and support all speeds up to specified, and only one */ +/* specified port type. */ +static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, + int maxspeed, int porttype) +{ + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); + + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + + switch (porttype) { + case PORT_TP: + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + break; + case PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); + break; + default: + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + WARN_ON_ONCE(1); + } + + /* partially does fall through, to also select lower speeds */ + switch (maxspeed) { + case SPEED_25000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 25000baseSR_Full); + break; + case SPEED_10000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseT_Full); + /* fall through */ + case SPEED_1000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Half); + /* fall through */ + case SPEED_100: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Half); + /* fall through */ + case SPEED_10: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Half); + break; + default: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Half); + WARN_ON_ONCE(1); + } +} + + +static int qeth_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct qeth_card *card = netdev->ml_priv; + enum qeth_link_types link_type; + struct carrier_info carrier_info; + int rc; + + if (IS_IQD(card) || IS_VM_NIC(card)) + link_type = QETH_LINK_TYPE_10GBIT_ETH; + else + link_type = card->info.link_type; + + cmd->base.duplex = DUPLEX_FULL; + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.phy_address = 0; + cmd->base.mdio_support = 0; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + + switch (link_type) { + case QETH_LINK_TYPE_FAST_ETH: + case QETH_LINK_TYPE_LANE_ETH100: + cmd->base.speed = SPEED_100; + cmd->base.port = PORT_TP; + break; + case QETH_LINK_TYPE_GBIT_ETH: + case QETH_LINK_TYPE_LANE_ETH1000: + cmd->base.speed = SPEED_1000; + cmd->base.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_10GBIT_ETH: + cmd->base.speed = SPEED_10000; + cmd->base.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_25GBIT_ETH: + cmd->base.speed = SPEED_25000; + cmd->base.port = PORT_FIBRE; + break; + default: + cmd->base.speed = SPEED_10; + cmd->base.port = PORT_TP; + } + qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port); + + /* Check if we can obtain more accurate information. */ + /* If QUERY_CARD_INFO command is not supported or fails, */ + /* just return the heuristics that was filled above. */ + rc = qeth_query_card_info(card, &carrier_info); + if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ + return 0; + if (rc) /* report error from the hardware operation */ + return rc; + /* on success, fill in the information got from the hardware */ + + netdev_dbg(netdev, + "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", + carrier_info.card_type, + carrier_info.port_mode, + carrier_info.port_speed); + + /* Update attributes for which we've obtained more authoritative */ + /* information, leave the rest the way they where filled above. */ + switch (carrier_info.card_type) { + case CARD_INFO_TYPE_1G_COPPER_A: + case CARD_INFO_TYPE_1G_COPPER_B: + cmd->base.port = PORT_TP; + qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); + break; + case CARD_INFO_TYPE_1G_FIBRE_A: + case CARD_INFO_TYPE_1G_FIBRE_B: + cmd->base.port = PORT_FIBRE; + qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); + break; + case CARD_INFO_TYPE_10G_FIBRE_A: + case CARD_INFO_TYPE_10G_FIBRE_B: + cmd->base.port = PORT_FIBRE; + qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port); + break; + } + + switch (carrier_info.port_mode) { + case CARD_INFO_PORTM_FULLDUPLEX: + cmd->base.duplex = DUPLEX_FULL; + break; + case CARD_INFO_PORTM_HALFDUPLEX: + cmd->base.duplex = DUPLEX_HALF; + break; + } + + switch (carrier_info.port_speed) { + case CARD_INFO_PORTS_10M: + cmd->base.speed = SPEED_10; + break; + case CARD_INFO_PORTS_100M: + cmd->base.speed = SPEED_100; + break; + case CARD_INFO_PORTS_1G: + cmd->base.speed = SPEED_1000; + break; + case CARD_INFO_PORTS_10G: + cmd->base.speed = SPEED_10000; + break; + case CARD_INFO_PORTS_25G: + cmd->base.speed = SPEED_25000; + break; + } + + return 0; +} + +const struct ethtool_ops qeth_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_strings = qeth_get_strings, + .get_ethtool_stats = qeth_get_ethtool_stats, + .get_sset_count = qeth_get_sset_count, + .get_drvinfo = qeth_get_drvinfo, + .get_link_ksettings = qeth_get_link_ksettings, +}; + +const struct ethtool_ops qeth_osn_ethtool_ops = { + .get_strings = qeth_get_strings, + .get_ethtool_stats = qeth_get_ethtool_stats, + .get_sset_count = qeth_get_sset_count, + .get_drvinfo = qeth_get_drvinfo, +}; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8931dd6e2caa..dbcba77fd94c 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -696,22 +696,6 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) unregister_netdev(card->dev); } -static const struct ethtool_ops qeth_l2_ethtool_ops = { - .get_link = ethtool_op_get_link, - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, - .get_link_ksettings = qeth_core_ethtool_get_link_ksettings, -}; - -static const struct ethtool_ops qeth_l2_osn_ops = { - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, -}; - static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, @@ -735,13 +719,10 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) card->dev->priv_flags |= IFF_UNICAST_FLT; card->dev->netdev_ops = &qeth_l2_netdev_ops; - if (card->info.type == QETH_CARD_TYPE_OSN) { - card->dev->ethtool_ops = &qeth_l2_osn_ops; + if (IS_OSN(card)) card->dev->flags |= IFF_NOARP; - } else { - card->dev->ethtool_ops = &qeth_l2_ethtool_ops; + else card->dev->needed_headroom = sizeof(struct qeth_hdr); - } if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 012a290f1f4b..40c7d53f5512 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2117,15 +2117,6 @@ tx_drop: return NETDEV_TX_OK; } -static const struct ethtool_ops qeth_l3_ethtool_ops = { - .get_link = ethtool_op_get_link, - .get_strings = qeth_core_get_strings, - .get_ethtool_stats = qeth_core_get_ethtool_stats, - .get_sset_count = qeth_core_get_sset_count, - .get_drvinfo = qeth_core_get_drvinfo, - .get_link_ksettings = qeth_core_ethtool_get_link_ksettings, -}; - /* * we need NOARP for IPv4 but we want neighbor solicitation for IPv6. Setting * NOARP on the netdevice is no option because it also turns off neighbor @@ -2247,7 +2238,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) return -ENODEV; card->dev->needed_headroom = headroom; - card->dev->ethtool_ops = &qeth_l3_ethtool_ops; card->dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; From b0abc4f5df76ceed497ffd959fd7839f9cd38a07 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:29 +0100 Subject: [PATCH 1270/1436] s390/qeth: overhaul ethtool statistics Accumulate per-TX queue statistics, and increase their size to 64 bit. Don't bother with enabling/disabling the statistics, the overhead is negligible. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 90 ++++++++++-------- drivers/s390/net/qeth_core_main.c | 127 +++++++++++++------------ drivers/s390/net/qeth_core_sys.c | 39 ++++---- drivers/s390/net/qeth_ethtool.c | 152 +++++++++++++++++------------- drivers/s390/net/qeth_l2_main.c | 28 +++--- drivers/s390/net/qeth_l3_main.c | 36 +++---- 6 files changed, 256 insertions(+), 216 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 5e08f112db13..c0c46be0b251 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -34,6 +34,8 @@ #include #include +#include + #include "qeth_core_mpc.h" /** @@ -112,35 +114,6 @@ static inline u32 qeth_get_device_id(struct ccw_device *cdev) #define CCW_DEVID(cdev) (qeth_get_device_id(cdev)) #define CARD_DEVID(card) (CCW_DEVID(CARD_RDEV(card))) -/** - * card stuff - */ -struct qeth_perf_stats { - unsigned int bufs_rec; - unsigned int bufs_sent; - unsigned int buf_elements_sent; - - unsigned int skbs_sent_pack; - unsigned int bufs_sent_pack; - - unsigned int sc_dp_p; - unsigned int sc_p_dp; - unsigned int large_send_bytes; - unsigned int large_send_cnt; - unsigned int sg_skbs_sent; - /* initial values when measuring starts */ - unsigned long initial_rx_packets; - unsigned long initial_tx_packets; - /* inbound scatter gather data */ - unsigned int sg_skbs_rx; - unsigned int sg_frags_rx; - unsigned int sg_alloc_page_rx; - unsigned int tx_csum; - unsigned int tx_lin; - unsigned int tx_linfail; - unsigned int rx_csum; -}; - /* Routing stuff */ struct qeth_routing_info { enum qeth_routing_types type; @@ -470,10 +443,54 @@ enum qeth_out_q_states { QETH_OUT_Q_LOCKED_FLUSH, }; +#define QETH_CARD_STAT_ADD(_c, _stat, _val) ((_c)->stats._stat += (_val)) +#define QETH_CARD_STAT_INC(_c, _stat) QETH_CARD_STAT_ADD(_c, _stat, 1) + +#define QETH_TXQ_STAT_ADD(_q, _stat, _val) ((_q)->stats._stat += (_val)) +#define QETH_TXQ_STAT_INC(_q, _stat) QETH_TXQ_STAT_ADD(_q, _stat, 1) + +struct qeth_card_stats { + u64 rx_bufs; + u64 rx_skb_csum; + u64 rx_sg_skbs; + u64 rx_sg_frags; + u64 rx_sg_alloc_page; + + /* rtnl_link_stats64 */ + u64 rx_packets; + u64 rx_bytes; + u64 rx_errors; + u64 rx_dropped; + u64 rx_multicast; + u64 tx_errors; +}; + +struct qeth_out_q_stats { + u64 bufs; + u64 bufs_pack; + u64 buf_elements; + u64 skbs_pack; + u64 skbs_sg; + u64 skbs_csum; + u64 skbs_tso; + u64 skbs_linearized; + u64 skbs_linearized_fail; + u64 tso_bytes; + u64 packing_mode_switch; + + /* rtnl_link_stats64 */ + u64 tx_packets; + u64 tx_bytes; + u64 tx_errors; + u64 tx_dropped; + u64 tx_carrier_errors; +}; + struct qeth_qdio_out_q { struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qdio_outbuf_state *bufstates; /* convenience pointer */ + struct qeth_out_q_stats stats; int queue_no; struct qeth_card *card; atomic_t state; @@ -677,7 +694,6 @@ struct qeth_card_options { struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; enum qeth_discipline_id layer; - int performance_stats; int rx_sg_cb; enum qeth_ipa_isolation_modes isolation; enum qeth_ipa_isolation_modes prev_isolation; @@ -753,8 +769,7 @@ struct qeth_card { struct qeth_channel data; struct net_device *dev; - struct net_device_stats stats; - + struct qeth_card_stats stats; struct qeth_card_info info; struct qeth_token token; struct qeth_seqno seqno; @@ -777,7 +792,6 @@ struct qeth_card { struct list_head cmd_waiter_list; /* QDIO buffer handling */ struct qeth_qdio_info qdio; - struct qeth_perf_stats perf_stats; int read_or_write_problem; struct qeth_osn_info osn_info; struct qeth_discipline *discipline; @@ -858,8 +872,7 @@ static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb, if ((card->dev->features & NETIF_F_RXCSUM) && (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) { skb->ip_summed = CHECKSUM_UNNECESSARY; - if (card->options.performance_stats) - card->perf_stats.rx_csum++; + QETH_CARD_STAT_INC(card, rx_skb_csum); } else { skb->ip_summed = CHECKSUM_NONE; } @@ -966,7 +979,6 @@ void qeth_clear_working_pool_list(struct qeth_card *); void qeth_clear_cmd_buffers(struct qeth_channel *); void qeth_clear_qdio_buffers(struct qeth_card *); void qeth_setadp_promisc_mode(struct qeth_card *); -struct net_device_stats *qeth_get_stats(struct net_device *); int qeth_setadpparms_change_macaddr(struct qeth_card *); void qeth_tx_timeout(struct net_device *); void qeth_prepare_control_data(struct qeth_card *, int, @@ -1002,14 +1014,16 @@ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); +void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); int qeth_open(struct net_device *dev); int qeth_stop(struct net_device *dev); int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, - void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, + void (*fill_header)(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len)); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 96e4876a4daa..6d93bb48b1d9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2703,8 +2703,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( } else { free_page((unsigned long)entry->elements[i]); entry->elements[i] = page_address(page); - if (card->options.performance_stats) - card->perf_stats.sg_alloc_page_rx++; + QETH_CARD_STAT_INC(card, rx_sg_alloc_page); } } } @@ -3180,7 +3179,7 @@ static int qeth_check_qdio_errors(struct qeth_card *card, buf->element[14].sflags); QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error); if ((buf->element[15].sflags) == 0x12) { - card->stats.rx_dropped++; + QETH_CARD_STAT_INC(card, rx_dropped); return 0; } else return 1; @@ -3322,8 +3321,7 @@ static void qeth_switch_to_packing_if_needed(struct qeth_qdio_out_q *queue) >= QETH_HIGH_WATERMARK_PACK){ /* switch non-PACKING -> PACKING */ QETH_CARD_TEXT(queue->card, 6, "np->pack"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.sc_dp_p++; + QETH_TXQ_STAT_INC(queue, packing_mode_switch); queue->do_pack = 1; } } @@ -3342,8 +3340,7 @@ static int qeth_switch_to_nonpacking_if_needed(struct qeth_qdio_out_q *queue) <= QETH_LOW_WATERMARK_PACK) { /* switch PACKING -> non-PACKING */ QETH_CARD_TEXT(queue->card, 6, "pack->np"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.sc_p_dp++; + QETH_TXQ_STAT_INC(queue, packing_mode_switch); queue->do_pack = 0; return qeth_prep_flush_pack_buffer(queue); } @@ -3397,6 +3394,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, } } + QETH_TXQ_STAT_ADD(queue, bufs, count); netif_trans_update(queue->card->dev); qdio_flags = QDIO_FLAG_SYNC_OUTPUT; if (atomic_read(&queue->set_pci_flags_count)) @@ -3405,7 +3403,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, queue->queue_no, index, count); if (rc) { - queue->card->stats.tx_errors += count; + QETH_TXQ_STAT_ADD(queue, tx_errors, count); /* ignore temporary SIGA errors without busy condition */ if (rc == -ENOBUFS) return; @@ -3420,8 +3418,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, qeth_schedule_recovery(queue->card); return; } - if (queue->card->options.performance_stats) - queue->card->perf_stats.bufs_sent += count; } static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) @@ -3452,10 +3448,8 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue) if (!flush_cnt && !atomic_read(&queue->set_pci_flags_count)) flush_cnt += qeth_prep_flush_pack_buffer(queue); - if (queue->card->options.performance_stats && - q_was_packing) - queue->card->perf_stats.bufs_sent_pack += - flush_cnt; + if (q_was_packing) + QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_cnt); if (flush_cnt) qeth_flush_buffers(queue, index, flush_cnt); atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); @@ -3748,11 +3742,12 @@ EXPORT_SYMBOL_GPL(qeth_count_elements); * The number of needed buffer elements is returned in @elements. * Error to create the hdr is indicated by returning with < 0. */ -static int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb, - struct qeth_hdr **hdr, unsigned int hdr_len, - unsigned int proto_len, unsigned int *elements) +static int qeth_add_hw_header(struct qeth_qdio_out_q *queue, + struct sk_buff *skb, struct qeth_hdr **hdr, + unsigned int hdr_len, unsigned int proto_len, + unsigned int *elements) { - const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(card); + const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(queue->card); const unsigned int contiguous = proto_len ? proto_len : 1; unsigned int __elements; addr_t start, end; @@ -3791,15 +3786,12 @@ check_layout: } rc = skb_linearize(skb); - if (card->options.performance_stats) { - if (rc) - card->perf_stats.tx_linfail++; - else - card->perf_stats.tx_lin++; - } - if (rc) + if (rc) { + QETH_TXQ_STAT_INC(queue, skbs_linearized_fail); return rc; + } + QETH_TXQ_STAT_INC(queue, skbs_linearized); /* Linearization changed the layout, re-evaluate: */ goto check_layout; } @@ -3923,9 +3915,8 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); } else { QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); - if (queue->card->options.performance_stats) - queue->card->perf_stats.skbs_sent_pack++; + QETH_TXQ_STAT_INC(queue, skbs_pack); /* If the buffer still has free elements, keep using it. */ if (buf->next_element_to_fill < QETH_MAX_BUFFER_ELEMENTS(queue->card)) @@ -4039,8 +4030,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, } out: /* at this point the queue is UNLOCKED again */ - if (queue->card->options.performance_stats && do_pack) - queue->card->perf_stats.bufs_sent_pack += flush_count; + if (do_pack) + QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count); return rc; } @@ -4064,8 +4055,9 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr, int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, - void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, + void (*fill_header)(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len)) { unsigned int proto_len, hw_hdr_len; @@ -4090,7 +4082,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, if (rc) return rc; - push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, proto_len, + push_len = qeth_add_hw_header(queue, skb, &hdr, hw_hdr_len, proto_len, &elements); if (push_len < 0) return push_len; @@ -4100,7 +4092,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, data_offset = push_len + proto_len; } memset(hdr, 0, hw_hdr_len); - fill_header(card, hdr, skb, ipv, cast_type, frame_len); + fill_header(queue, hdr, skb, ipv, cast_type, frame_len); if (is_tso) qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr, frame_len - proto_len, skb, proto_len); @@ -4117,14 +4109,12 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, } if (!rc) { - if (card->options.performance_stats) { - card->perf_stats.buf_elements_sent += elements; - if (is_sg) - card->perf_stats.sg_skbs_sent++; - if (is_tso) { - card->perf_stats.large_send_bytes += frame_len; - card->perf_stats.large_send_cnt++; - } + QETH_TXQ_STAT_ADD(queue, buf_elements, elements); + if (is_sg) + QETH_TXQ_STAT_INC(queue, skbs_sg); + if (is_tso) { + QETH_TXQ_STAT_INC(queue, skbs_tso); + QETH_TXQ_STAT_ADD(queue, tso_bytes, frame_len); } } else { if (!push_len) @@ -4183,18 +4173,6 @@ void qeth_setadp_promisc_mode(struct qeth_card *card) } EXPORT_SYMBOL_GPL(qeth_setadp_promisc_mode); -struct net_device_stats *qeth_get_stats(struct net_device *dev) -{ - struct qeth_card *card; - - card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "getstat"); - - return &card->stats; -} -EXPORT_SYMBOL_GPL(qeth_get_stats); - static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { @@ -4388,7 +4366,7 @@ void qeth_tx_timeout(struct net_device *dev) card = dev->ml_priv; QETH_CARD_TEXT(card, 4, "txtimeo"); - card->stats.tx_errors++; + QETH_CARD_STAT_INC(card, tx_errors); qeth_schedule_recovery(card); } EXPORT_SYMBOL_GPL(qeth_tx_timeout); @@ -5246,7 +5224,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, QETH_CARD_TEXT(card, 4, "unexeob"); QETH_CARD_HEX(card, 2, buffer, sizeof(void *)); dev_kfree_skb_any(skb); - card->stats.rx_errors++; + QETH_CARD_STAT_INC(card, rx_errors); return NULL; } element++; @@ -5258,16 +5236,17 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, } *__element = element; *__offset = offset; - if (use_rx_sg && card->options.performance_stats) { - card->perf_stats.sg_skbs_rx++; - card->perf_stats.sg_frags_rx += skb_shinfo(skb)->nr_frags; + if (use_rx_sg) { + QETH_CARD_STAT_INC(card, rx_sg_skbs); + QETH_CARD_STAT_ADD(card, rx_sg_frags, + skb_shinfo(skb)->nr_frags); } return skb; no_mem: if (net_ratelimit()) { QETH_CARD_TEXT(card, 2, "noskbmem"); } - card->stats.rx_dropped++; + QETH_CARD_STAT_INC(card, rx_dropped); return NULL; } EXPORT_SYMBOL_GPL(qeth_core_get_next_skb); @@ -5308,8 +5287,7 @@ int qeth_poll(struct napi_struct *napi, int budget) done = 1; if (done) { - if (card->options.performance_stats) - card->perf_stats.bufs_rec++; + QETH_CARD_STAT_INC(card, rx_bufs); qeth_put_buffer_pool_entry(card, buffer->pool_entry); qeth_queue_input_buffer(card, card->rx.b_index); @@ -6223,6 +6201,33 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(qeth_features_check); +void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct qeth_card *card = dev->ml_priv; + struct qeth_qdio_out_q *queue; + unsigned int i; + + QETH_CARD_TEXT(card, 5, "getstat"); + + stats->rx_packets = card->stats.rx_packets; + stats->rx_bytes = card->stats.rx_bytes; + stats->rx_errors = card->stats.rx_errors; + stats->rx_dropped = card->stats.rx_dropped; + stats->multicast = card->stats.rx_multicast; + stats->tx_errors = card->stats.tx_errors; + + for (i = 0; i < card->qdio.no_out_queues; i++) { + queue = card->qdio.out_qs[i]; + + stats->tx_packets += queue->stats.tx_packets; + stats->tx_bytes += queue->stats.tx_bytes; + stats->tx_errors += queue->stats.tx_errors; + stats->tx_dropped += queue->stats.tx_dropped; + stats->tx_carrier_errors += queue->stats.tx_carrier_errors; + } +} +EXPORT_SYMBOL_GPL(qeth_get_stats64); + int qeth_open(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 485a335669ba..8b223cc2c19b 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -336,35 +336,36 @@ static ssize_t qeth_dev_performance_stats_show(struct device *dev, if (!card) return -EINVAL; - return sprintf(buf, "%i\n", card->options.performance_stats ? 1:0); + return sprintf(buf, "1\n"); } static ssize_t qeth_dev_performance_stats_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - char *tmp; - int i, rc = 0; + struct qeth_qdio_out_q *queue; + unsigned int i; + bool reset; + int rc; if (!card) return -EINVAL; - mutex_lock(&card->conf_mutex); - i = simple_strtoul(buf, &tmp, 16); - if ((i == 0) || (i == 1)) { - if (i == card->options.performance_stats) - goto out; - card->options.performance_stats = i; - if (i == 0) - memset(&card->perf_stats, 0, - sizeof(struct qeth_perf_stats)); - card->perf_stats.initial_rx_packets = card->stats.rx_packets; - card->perf_stats.initial_tx_packets = card->stats.tx_packets; - } else - rc = -EINVAL; -out: - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; + rc = kstrtobool(buf, &reset); + if (rc) + return rc; + + if (reset) { + memset(&card->stats, 0, sizeof(card->stats)); + for (i = 0; i < card->qdio.no_out_queues; i++) { + queue = card->qdio.out_qs[i]; + if (!queue) + break; + memset(&queue->stats, 0, sizeof(queue->stats)); + } + } + + return count; } static DEVICE_ATTR(performance_stats, 0644, qeth_dev_performance_stats_show, diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index a275f31150b2..9ce5ef5fcf7a 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -9,43 +9,82 @@ #include #include "qeth_core.h" -static struct { - const char str[ETH_GSTRING_LEN]; -} qeth_ethtool_stats_keys[] = { -/* 0 */{"rx skbs"}, - {"rx buffers"}, - {"tx skbs"}, - {"tx buffers"}, - {"tx skbs no packing"}, - {"tx buffers no packing"}, - {"tx skbs packing"}, - {"tx buffers packing"}, - {"tx sg skbs"}, - {"tx buffer elements"}, -/* 10 */{"rx sg skbs"}, - {"rx sg frags"}, - {"rx sg page allocs"}, - {"tx large kbytes"}, - {"tx large count"}, - {"tx pk state ch n->p"}, - {"tx pk state ch p->n"}, - {"tx pk watermark low"}, - {"tx pk watermark high"}, - {"queue 0 buffer usage"}, -/* 20 */{"queue 1 buffer usage"}, - {"queue 2 buffer usage"}, - {"queue 3 buffer usage"}, - {"tx csum"}, - {"tx lin"}, - {"tx linfail"}, - {"rx csum"} + +#define QETH_TXQ_STAT(_name, _stat) { \ + .name = _name, \ + .offset = offsetof(struct qeth_out_q_stats, _stat) \ +} + +#define QETH_CARD_STAT(_name, _stat) { \ + .name = _name, \ + .offset = offsetof(struct qeth_card_stats, _stat) \ +} + +struct qeth_stats { + char name[ETH_GSTRING_LEN]; + unsigned int offset; }; +static const struct qeth_stats txq_stats[] = { + QETH_TXQ_STAT("IO buffers", bufs), + QETH_TXQ_STAT("IO buffer elements", buf_elements), + QETH_TXQ_STAT("packed IO buffers", bufs_pack), + QETH_TXQ_STAT("skbs", tx_packets), + QETH_TXQ_STAT("packed skbs", skbs_pack), + QETH_TXQ_STAT("SG skbs", skbs_sg), + QETH_TXQ_STAT("HW csum skbs", skbs_csum), + QETH_TXQ_STAT("TSO skbs", skbs_tso), + QETH_TXQ_STAT("linearized skbs", skbs_linearized), + QETH_TXQ_STAT("linearized+error skbs", skbs_linearized_fail), + QETH_TXQ_STAT("TSO bytes", tso_bytes), + QETH_TXQ_STAT("Packing mode switches", packing_mode_switch), +}; + +static const struct qeth_stats card_stats[] = { + QETH_CARD_STAT("rx0 IO buffers", rx_bufs), + QETH_CARD_STAT("rx0 HW csum skbs", rx_skb_csum), + QETH_CARD_STAT("rx0 SG skbs", rx_sg_skbs), + QETH_CARD_STAT("rx0 SG page frags", rx_sg_frags), + QETH_CARD_STAT("rx0 SG page allocs", rx_sg_alloc_page), +}; + +#define TXQ_STATS_LEN ARRAY_SIZE(txq_stats) +#define CARD_STATS_LEN ARRAY_SIZE(card_stats) + +static void qeth_add_stat_data(u64 **dst, void *src, + const struct qeth_stats stats[], + unsigned int size) +{ + unsigned int i; + char *stat; + + for (i = 0; i < size; i++) { + stat = (char *)src + stats[i].offset; + **dst = *(u64 *)stat; + (*dst)++; + } +} + +static void qeth_add_stat_strings(u8 **data, const char *prefix, + const struct qeth_stats stats[], + unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) { + snprintf(*data, ETH_GSTRING_LEN, "%s%s", prefix, stats[i].name); + *data += ETH_GSTRING_LEN; + } +} + static int qeth_get_sset_count(struct net_device *dev, int stringset) { + struct qeth_card *card = dev->ml_priv; + switch (stringset) { case ETH_SS_STATS: - return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN); + return CARD_STATS_LEN + + card->qdio.no_out_queues * TXQ_STATS_LEN; default: return -EINVAL; } @@ -55,48 +94,29 @@ static void qeth_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct qeth_card *card = dev->ml_priv; + unsigned int i; - data[0] = card->stats.rx_packets - - card->perf_stats.initial_rx_packets; - data[1] = card->perf_stats.bufs_rec; - data[2] = card->stats.tx_packets - - card->perf_stats.initial_tx_packets; - data[3] = card->perf_stats.bufs_sent; - data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets - - card->perf_stats.skbs_sent_pack; - data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack; - data[6] = card->perf_stats.skbs_sent_pack; - data[7] = card->perf_stats.bufs_sent_pack; - data[8] = card->perf_stats.sg_skbs_sent; - data[9] = card->perf_stats.buf_elements_sent; - data[10] = card->perf_stats.sg_skbs_rx; - data[11] = card->perf_stats.sg_frags_rx; - data[12] = card->perf_stats.sg_alloc_page_rx; - data[13] = (card->perf_stats.large_send_bytes >> 10); - data[14] = card->perf_stats.large_send_cnt; - data[15] = card->perf_stats.sc_dp_p; - data[16] = card->perf_stats.sc_p_dp; - data[17] = QETH_LOW_WATERMARK_PACK; - data[18] = QETH_HIGH_WATERMARK_PACK; - data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers); - data[20] = (card->qdio.no_out_queues > 1) ? - atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0; - data[21] = (card->qdio.no_out_queues > 2) ? - atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0; - data[22] = (card->qdio.no_out_queues > 3) ? - atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0; - data[23] = card->perf_stats.tx_csum; - data[24] = card->perf_stats.tx_lin; - data[25] = card->perf_stats.tx_linfail; - data[26] = card->perf_stats.rx_csum; + qeth_add_stat_data(&data, &card->stats, card_stats, CARD_STATS_LEN); + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_add_stat_data(&data, &card->qdio.out_qs[i]->stats, + txq_stats, TXQ_STATS_LEN); } static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + struct qeth_card *card = dev->ml_priv; + char prefix[ETH_GSTRING_LEN] = ""; + unsigned int i; + switch (stringset) { case ETH_SS_STATS: - memcpy(data, &qeth_ethtool_stats_keys, - sizeof(qeth_ethtool_stats_keys)); + qeth_add_stat_strings(&data, prefix, card_stats, + CARD_STATS_LEN); + for (i = 0; i < card->qdio.no_out_queues; i++) { + snprintf(prefix, ETH_GSTRING_LEN, "tx%u ", i); + qeth_add_stat_strings(&data, prefix, txq_stats, + TXQ_STATS_LEN); + } break; default: WARN_ON(1); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index dbcba77fd94c..b1280a155953 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -174,9 +174,9 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb) return RTN_UNICAST; } -static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, - unsigned int data_len) +static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len) { struct vlan_ethhdr *veth = vlan_eth_hdr(skb); @@ -188,8 +188,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; if (skb->ip_summed == CHECKSUM_PARTIAL) { qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv); - if (card->options.performance_stats) - card->perf_stats.tx_csum++; + QETH_TXQ_STAT_INC(queue, skbs_csum); } } @@ -369,8 +368,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, } work_done++; budget--; - card->stats.rx_packets++; - card->stats.rx_bytes += len; + QETH_CARD_STAT_INC(card, rx_packets); + QETH_CARD_STAT_ADD(card, rx_bytes, len); } return work_done; } @@ -626,12 +625,13 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, int tx_bytes = skb->len; int rc; + queue = qeth_get_tx_queue(card, skb, ipv, cast_type); + if (card->state != CARD_STATE_UP) { - card->stats.tx_carrier_errors++; + QETH_TXQ_STAT_INC(queue, tx_carrier_errors); goto tx_drop; } - queue = qeth_get_tx_queue(card, skb, ipv, cast_type); netif_stop_queue(dev); if (IS_OSN(card)) @@ -641,8 +641,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, qeth_l2_fill_header); if (!rc) { - card->stats.tx_packets++; - card->stats.tx_bytes += tx_bytes; + QETH_TXQ_STAT_INC(queue, tx_packets); + QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { @@ -650,8 +650,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ tx_drop: - card->stats.tx_dropped++; - card->stats.tx_errors++; + QETH_TXQ_STAT_INC(queue, tx_dropped); + QETH_TXQ_STAT_INC(queue, tx_errors); dev_kfree_skb_any(skb); netif_wake_queue(dev); return NETDEV_TX_OK; @@ -699,7 +699,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l2_hard_start_xmit, .ndo_features_check = qeth_features_check, .ndo_validate_addr = qeth_l2_validate_addr, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 40c7d53f5512..07c3149e228c 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1315,12 +1315,11 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr); else ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr); - - card->stats.multicast++; + QETH_CARD_STAT_INC(card, rx_multicast); break; case QETH_CAST_BROADCAST: ether_addr_copy(tg_addr, card->dev->broadcast); - card->stats.multicast++; + QETH_CARD_STAT_INC(card, rx_multicast); break; default: if (card->options.sniffer) @@ -1401,8 +1400,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, } work_done++; budget--; - card->stats.rx_packets++; - card->stats.rx_bytes += len; + QETH_CARD_STAT_INC(card, rx_packets); + QETH_CARD_STAT_ADD(card, rx_bytes, len); } return work_done; } @@ -1945,12 +1944,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type) return QETH_CAST_UNICAST; } -static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, - struct sk_buff *skb, int ipv, int cast_type, - unsigned int data_len) +static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, + struct qeth_hdr *hdr, struct sk_buff *skb, + int ipv, int cast_type, unsigned int data_len) { struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3; struct vlan_ethhdr *veth = vlan_eth_hdr(skb); + struct qeth_card *card = queue->card; hdr->hdr.l3.length = data_len; @@ -1972,8 +1972,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, /* some HW requires combined L3+L4 csum offload: */ if (ipv == 4) hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ; - if (card->options.performance_stats) - card->perf_stats.tx_csum++; + QETH_TXQ_STAT_INC(queue, skbs_csum); } } @@ -2074,6 +2073,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, int tx_bytes = skb->len; int rc; + queue = qeth_get_tx_queue(card, skb, ipv, cast_type); + if (IS_IQD(card)) { if (card->options.sniffer) goto tx_drop; @@ -2084,14 +2085,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } if (card->state != CARD_STATE_UP) { - card->stats.tx_carrier_errors++; + QETH_TXQ_STAT_INC(queue, tx_carrier_errors); goto tx_drop; } if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable) goto tx_drop; - queue = qeth_get_tx_queue(card, skb, ipv, cast_type); netif_stop_queue(dev); if (ipv == 4 || IS_IQD(card)) @@ -2101,8 +2101,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, qeth_l3_fill_header); if (!rc) { - card->stats.tx_packets++; - card->stats.tx_bytes += tx_bytes; + QETH_TXQ_STAT_INC(queue, tx_packets); + QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes); netif_wake_queue(dev); return NETDEV_TX_OK; } else if (rc == -EBUSY) { @@ -2110,8 +2110,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ tx_drop: - card->stats.tx_dropped++; - card->stats.tx_errors++; + QETH_TXQ_STAT_INC(queue, tx_dropped); + QETH_TXQ_STAT_INC(queue, tx_errors); dev_kfree_skb_any(skb); netif_wake_queue(dev); return NETDEV_TX_OK; @@ -2153,7 +2153,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, static const struct net_device_ops qeth_l3_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = qeth_l3_set_rx_mode, @@ -2168,7 +2168,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = { static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_open = qeth_open, .ndo_stop = qeth_stop, - .ndo_get_stats = qeth_get_stats, + .ndo_get_stats64 = qeth_get_stats64, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_features_check = qeth_l3_osa_features_check, .ndo_validate_addr = eth_validate_addr, From 1b4d5e1c617e7c11a0461470122ab7723f99906c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:30 +0100 Subject: [PATCH 1271/1436] s390/qeth: add support for ETHTOOL_GRINGPARAM Implement a trivial callback that exposes the queue sizes. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_ethtool.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index 9ce5ef5fcf7a..93a53fed4cf8 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -102,6 +102,22 @@ static void qeth_get_ethtool_stats(struct net_device *dev, txq_stats, TXQ_STATS_LEN); } +static void qeth_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct qeth_card *card = dev->ml_priv; + + param->rx_max_pending = QDIO_MAX_BUFFERS_PER_Q; + param->rx_mini_max_pending = 0; + param->rx_jumbo_max_pending = 0; + param->tx_max_pending = QDIO_MAX_BUFFERS_PER_Q; + + param->rx_pending = card->qdio.in_buf_pool.buf_count; + param->rx_mini_pending = 0; + param->rx_jumbo_pending = 0; + param->tx_pending = QDIO_MAX_BUFFERS_PER_Q; +} + static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct qeth_card *card = dev->ml_priv; @@ -338,6 +354,7 @@ static int qeth_get_link_ksettings(struct net_device *netdev, const struct ethtool_ops qeth_ethtool_ops = { .get_link = ethtool_op_get_link, + .get_ringparam = qeth_get_ringparam, .get_strings = qeth_get_strings, .get_ethtool_stats = qeth_get_ethtool_stats, .get_sset_count = qeth_get_sset_count, From 8024cc9e854a8e3c6ced6731905caeb1e0037f5a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 15 Feb 2019 19:22:31 +0100 Subject: [PATCH 1272/1436] s390/qeth: split out OSN netdev ops Rather than special-casing OSN in a number of places, just give this device type its own netdev_ops structure. When setting up the OSN net_device, also skip the handling of the various HW offloads (eg TSO). The device shouldn't be advertising any of them, and the OSN code paths in qeth don't have support for them. In particular RX VLAN filtering is not supported, so don't hook up those callbacks in the netdev_ops. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 3 --- drivers/s390/net/qeth_core_mpc.h | 1 + drivers/s390/net/qeth_l2_main.c | 32 +++++++++++++++++++------------ 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6d93bb48b1d9..4708df39f129 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5850,9 +5850,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!card) return -ENODEV; - if (card->info.type == QETH_CARD_TYPE_OSN) - return -EPERM; - switch (cmd) { case SIOC_QETH_ADP_SET_SNMP_CONTROL: rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data); diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 4b1690380ebe..f8c5d4a9be13 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -81,6 +81,7 @@ enum qeth_card_types { #define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD) #define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM) #define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN) +#define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX) #define IS_VM_NIC(card) ((card)->info.guestlan) #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18 diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index b1280a155953..2c9714215775 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -425,7 +425,7 @@ static int qeth_l2_validate_addr(struct net_device *dev) { struct qeth_card *card = dev->ml_priv; - if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) return eth_validate_addr(dev); QETH_CARD_TEXT(card, 4, "nomacadr"); @@ -441,9 +441,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) QETH_CARD_TEXT(card, 3, "setmac"); - if (card->info.type == QETH_CARD_TYPE_OSN || - card->info.type == QETH_CARD_TYPE_OSM || - card->info.type == QETH_CARD_TYPE_OSX) { + if (IS_OSM(card) || IS_OSX(card)) { QETH_CARD_TEXT(card, 3, "setmcTYP"); return -EOPNOTSUPP; } @@ -537,9 +535,6 @@ static void qeth_l2_set_rx_mode(struct net_device *dev) int i; int rc; - if (card->info.type == QETH_CARD_TYPE_OSN) - return; - QETH_CARD_TEXT(card, 3, "setmulti"); spin_lock_bh(&card->mclock); @@ -713,16 +708,28 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_set_features = qeth_set_features }; +static const struct net_device_ops qeth_osn_netdev_ops = { + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, + .ndo_get_stats64 = qeth_get_stats64, + .ndo_start_xmit = qeth_l2_hard_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = qeth_tx_timeout, +}; + static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) { int rc; - card->dev->priv_flags |= IFF_UNICAST_FLT; - card->dev->netdev_ops = &qeth_l2_netdev_ops; - if (IS_OSN(card)) + if (IS_OSN(card)) { + card->dev->netdev_ops = &qeth_osn_netdev_ops; card->dev->flags |= IFF_NOARP; - else - card->dev->needed_headroom = sizeof(struct qeth_hdr); + goto add_napi; + } + + card->dev->needed_headroom = sizeof(struct qeth_hdr); + card->dev->netdev_ops = &qeth_l2_netdev_ops; + card->dev->priv_flags |= IFF_UNICAST_FLT; if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; @@ -764,6 +771,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); } +add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); if (!rc && carrier_ok) From fea83353177a55540c71c140887737c282137aa2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:49 -0800 Subject: [PATCH 1273/1436] net: dsa: b53: Fix default VLAN ID We were not consistent in how the default VID of a given port was defined, b53_br_leave() would make sure the VLAN ID would be either 0/1 depending on the switch generation, but b53_configure_vlan(), which is the default configuration would unconditionally set it to 1. The correct value is 1 for 5325/5365 series and 0 otherwise. To avoid repeating that mistake ever again, introduce a helper function: b53_default_pvid() to factor that out. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0e4bbdcc614f..964a9ec4652a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -632,15 +632,25 @@ static void b53_enable_mib(struct b53_device *dev) b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); } +static u16 b53_default_pvid(struct b53_device *dev) +{ + if (is5325(dev) || is5365(dev)) + return 1; + else + return 0; +} + int b53_configure_vlan(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; struct b53_vlan vl = { 0 }; - int i; + int i, def_vid; + + def_vid = b53_default_pvid(dev); /* clear all vlan entries */ if (is5325(dev) || is5365(dev)) { - for (i = 1; i < dev->num_vlans; i++) + for (i = def_vid; i < dev->num_vlans; i++) b53_set_vlan_entry(dev, i, &vl); } else { b53_do_vlan_op(dev, VTA_CMD_CLEAR); @@ -650,7 +660,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_for_each_port(dev, i) b53_write16(dev, B53_VLAN_PAGE, - B53_VLAN_PORT_DEF_TAG(i), 1); + B53_VLAN_PORT_DEF_TAG(i), def_vid); if (!is5325(dev) && !is5365(dev)) b53_set_jumbo(dev, dev->enable_jumbo, false); @@ -1326,12 +1336,8 @@ int b53_vlan_del(struct dsa_switch *ds, int port, vl->members &= ~BIT(port); - if (pvid == vid) { - if (is5325(dev) || is5365(dev)) - pvid = 1; - else - pvid = 0; - } + if (pvid == vid) + pvid = b53_default_pvid(dev); if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag &= ~(BIT(port)); @@ -1644,10 +1650,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; - if (is5325(dev) || is5365(dev)) - pvid = 1; - else - pvid = 0; + pvid = b53_default_pvid(dev); /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { From dad8d7c6452b5b9f9828c9e2c7ca143205fd40c7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:50 -0800 Subject: [PATCH 1274/1436] net: dsa: b53: Properly account for VLAN filtering VLAN filtering can be built into the kernel, and also dynamically turned on/off through the bridge master device. Allow re-configuring the switch appropriately to account for that by deciding whether VLAN table (v_table) misses should lead to a drop or forward. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 59 +++++++++++++++++++++++++++++--- drivers/net/dsa/b53/b53_priv.h | 3 ++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 964a9ec4652a..2fef4c564420 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -344,7 +344,8 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt); } -static void b53_enable_vlan(struct b53_device *dev, bool enable) +static void b53_enable_vlan(struct b53_device *dev, bool enable, + bool enable_filtering) { u8 mgmt, vc0, vc1, vc4 = 0, vc5; @@ -369,8 +370,13 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable) vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; - vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; - vc5 |= VC5_DROP_VTABLE_MISS; + if (enable_filtering) { + vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; + vc5 |= VC5_DROP_VTABLE_MISS; + } else { + vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc5 &= ~VC5_DROP_VTABLE_MISS; + } if (is5325(dev)) vc0 &= ~VC0_RESERVED_1; @@ -420,6 +426,9 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable) } b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt); + + dev->vlan_enabled = enable; + dev->vlan_filtering_enabled = enable_filtering; } static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100) @@ -656,7 +665,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, false); + b53_enable_vlan(dev, false, dev->vlan_filtering_enabled); b53_for_each_port(dev, i) b53_write16(dev, B53_VLAN_PAGE, @@ -1265,6 +1274,46 @@ EXPORT_SYMBOL(b53_phylink_mac_link_up); int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { + struct b53_device *dev = ds->priv; + struct net_device *bridge_dev; + unsigned int i; + u16 pvid, new_pvid; + + /* Handle the case were multiple bridges span the same switch device + * and one of them has a different setting than what is being requested + * which would be breaking filtering semantics for any of the other + * bridge devices. + */ + b53_for_each_port(dev, i) { + bridge_dev = dsa_to_port(ds, i)->bridge_dev; + if (bridge_dev && + bridge_dev != dsa_to_port(ds, port)->bridge_dev && + br_vlan_enabled(bridge_dev) != vlan_filtering) { + netdev_err(bridge_dev, + "VLAN filtering is global to the switch!\n"); + return -EINVAL; + } + } + + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); + new_pvid = pvid; + if (dev->vlan_filtering_enabled && !vlan_filtering) { + /* Filtering is currently enabled, use the default PVID since + * the bridge does not expect tagging anymore + */ + dev->ports[port].pvid = pvid; + new_pvid = b53_default_pvid(dev); + } else if (!dev->vlan_filtering_enabled && vlan_filtering) { + /* Filtering is currently disabled, restore the previous PVID */ + new_pvid = dev->ports[port].pvid; + } + + if (pvid != new_pvid) + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), + new_pvid); + + b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering); + return 0; } EXPORT_SYMBOL(b53_vlan_filtering); @@ -1280,7 +1329,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, if (vlan->vid_end > dev->num_vlans) return -ERANGE; - b53_enable_vlan(dev, true); + b53_enable_vlan(dev, true, dev->vlan_filtering_enabled); return 0; } diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index ec796482792d..4dc7ee38b258 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -91,6 +91,7 @@ enum { struct b53_port { u16 vlan_ctl_mask; struct ethtool_eee eee; + u16 pvid; }; struct b53_vlan { @@ -137,6 +138,8 @@ struct b53_device { unsigned int num_vlans; struct b53_vlan *vlans; + bool vlan_enabled; + bool vlan_filtering_enabled; unsigned int num_ports; struct b53_port *ports; }; From a40061ea2e39494104602b3048751341bda374a1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:51 -0800 Subject: [PATCH 1275/1436] net: systemport: Fix reception of BPDUs SYSTEMPORT has its RXCHK parser block that attempts to validate the packet structures, unfortunately setting the L2 header check bit will cause Bridge PDUs (BPDUs) to be incorrectly rejected because they look like LLC/SNAP packets with a non-IPv4 or non-IPv6 Ethernet Type. Fixes: 4e8aedfe78c7 ("net: systemport: Turn on offloads by default") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 28c9b0bdf2f6..bc3ac369cbe3 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -134,6 +134,10 @@ static void bcm_sysport_set_rx_csum(struct net_device *dev, priv->rx_chk_en = !!(wanted & NETIF_F_RXCSUM); reg = rxchk_readl(priv, RXCHK_CONTROL); + /* Clear L2 header checks, which would prevent BPDUs + * from being received. + */ + reg &= ~RXCHK_L2_HDR_DIS; if (priv->rx_chk_en) reg |= RXCHK_EN; else From c3152ec4c0691e351f35a2f63347a464b5f35151 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:52 -0800 Subject: [PATCH 1276/1436] net: dsa: bcm_sf2: Do not assume DSA master supports WoL We assume in the bcm_sf2 driver that the DSA master network device supports ethtool_ops::{get,set}_wol operations, which is not a given. Avoid de-referencing potentially non-existent function pointers and check them as we should. Fixes: 96e65d7f3f88 ("net: dsa: bcm_sf2: add support for Wake-on-LAN") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 17ec32b0a1cc..14138d423cf1 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -726,10 +726,11 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, { struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_wolinfo pwol; + struct ethtool_wolinfo pwol = { }; /* Get the parent device WoL settings */ - p->ethtool_ops->get_wol(p, &pwol); + if (p->ethtool_ops->get_wol) + p->ethtool_ops->get_wol(p, &pwol); /* Advertise the parent device supported settings */ wol->supported = pwol.supported; @@ -750,9 +751,10 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct net_device *p = ds->ports[port].cpu_dp->master; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->ports[port].cpu_dp->index; - struct ethtool_wolinfo pwol; + struct ethtool_wolinfo pwol = { }; - p->ethtool_ops->get_wol(p, &pwol); + if (p->ethtool_ops->get_wol) + p->ethtool_ops->get_wol(p, &pwol); if (wol->wolopts & ~pwol.supported) return -EINVAL; From 10163aaee9671b01b2f4737922e1a4f43581047a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 15 Feb 2019 12:16:53 -0800 Subject: [PATCH 1277/1436] net: dsa: b53: Do not program CPU port's PVID The CPU port is special and does not need to obey VLAN restrictions as far as untagged traffic goes, also, having the CPU port be part of a particular PVID is against the idea of keeping it tagged in all VLANs. Fixes: ca8931948344 ("net: dsa: b53: Keep CPU port as tagged in all VLANs") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2fef4c564420..c76892ac4e69 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1359,7 +1359,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_fast_age_vlan(dev, vid); } - if (pvid) { + if (pvid && !dsa_is_cpu_port(ds, port)) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), vlan->vid_end); b53_fast_age_vlan(dev, vid); From c93a49b9769e435990c82297aa0baa31e1538790 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Fri, 15 Feb 2019 17:51:48 +0100 Subject: [PATCH 1278/1436] ipvs: fix warning on unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_IP_VS_IPV6 is not defined, build produced this warning: net/netfilter/ipvs/ip_vs_ctl.c:899:6: warning: unused variable ‘ret’ [-Wunused-variable] int ret = 0; ^~~ Fix this by moving the declaration of 'ret' in the CONFIG_IP_VS_IPV6 section in the same function. While at it, drop its unneeded initialisation. Fixes: 098e13f5b21d ("ipvs: fix dependency on nf_defrag_ipv6") Reported-by: Stefano Brivio Signed-off-by: Andrea Claudi Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 86afacb07e5f..ac8d848d7624 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -896,12 +896,13 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; - int ret = 0; EnterFunction(2); #ifdef CONFIG_IP_VS_IPV6 if (udest->af == AF_INET6) { + int ret; + atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && From dddaf89e2fbc69b15e82a96ccd174c70f65bf3d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 16 Feb 2019 08:16:06 +0000 Subject: [PATCH 1279/1436] netfilter: ipt_CLUSTERIP: make symbol 'cip_netdev_notifier' static Fixes the following sparse warnings: net/ipv4/netfilter/ipt_CLUSTERIP.c:867:23: warning: symbol 'cip_netdev_notifier' was not declared. Should it be static? Fixes: 5a86d68bcf02 ("netfilter: ipt_CLUSTERIP: fix deadlock in netns exit routine") Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b61977db9b7f..91b369bc44f9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -864,7 +864,7 @@ static struct pernet_operations clusterip_net_ops = { .size = sizeof(struct clusterip_net), }; -struct notifier_block cip_netdev_notifier = { +static struct notifier_block cip_netdev_notifier = { .notifier_call = clusterip_netdev_event }; From a3419ce3356cf1fdc69a0524eced84cef730b3bf Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Sat, 16 Feb 2019 10:49:12 +0100 Subject: [PATCH 1280/1436] netfilter: nf_conntrack_sip: add sip_external_media logic When enabled, the sip_external_media logic will leave SDP payload untouched when it detects that interface towards INVITEd party is the same with the one towards media endpoint. The typical scenario for this logic is when a LAN SIP agent has more than one IP address (uses a different address for media streams than the one used on signalling stream) and it also forwards calls to a voice mailbox located on the WAN side. In such case sip_direct_media must be disabled (so normal calls could be handled by the SIP helper), but media streams that are not traversing this router must also be excluded from address translation (e.g. call forwards). Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index c8d2b6688a2a..f067c6b50857 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -21,6 +21,8 @@ #include #include +#include +#include #include #include #include @@ -54,6 +56,11 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); +static int sip_external_media __read_mostly = 0; +module_param(sip_external_media, int, 0600); +MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external " + "endpoints (default 0)"); + const struct nf_nat_sip_hooks *nf_nat_sip_hooks; EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); @@ -861,6 +868,41 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3)) return NF_ACCEPT; saddr = &ct->tuplehash[!dir].tuple.src.u3; + } else if (sip_external_media) { + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(dev); + struct rtable *rt; + struct flowi4 fl4 = {}; +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl6 = {}; +#endif + struct dst_entry *dst = NULL; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + fl4.daddr = daddr->ip; + rt = ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) + dst = &rt->dst; + break; + +#if IS_ENABLED(CONFIG_IPV6) + case NFPROTO_IPV6: + fl6.daddr = daddr->in6; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + dst_release(dst); + dst = NULL; + } + break; +#endif + } + + /* Don't predict any conntracks when media endpoint is reachable + * through the same interface as the signalling peer. + */ + if (dst && dst->dev == dev) + return NF_ACCEPT; } /* We need to check whether the registration exists before attempting From 8a5b403d71affa098009cc3dff1b2c45113021ad Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Feb 2019 13:33:32 +0100 Subject: [PATCH 1281/1436] arm64, mm, efi: Account for GICv3 LPI tables in static memblock reserve table In the irqchip and EFI code, we have what basically amounts to a quirk to work around a peculiarity in the GICv3 architecture, which permits the system memory address of LPI tables to be programmable only once after a CPU reset. This means kexec kernels must use the same memory as the first kernel, and thus ensure that this memory has not been given out for other purposes by the time the ITS init code runs, which is not very early for secondary CPUs. On systems with many CPUs, these reservations could overflow the memblock reservation table, and this was addressed in commit: eff896288872 ("efi/arm: Defer persistent reservations until after paging_init()") However, this turns out to have made things worse, since the allocation of page tables and heap space for the resized memblock reservation table itself may overwrite the regions we are attempting to reserve, which may cause all kinds of corruption, also considering that the ITS will still be poking bits into that memory in response to incoming MSIs. So instead, let's grow the static memblock reservation table on such systems so it can accommodate these reservations at an earlier time. This will permit us to revert the above commit in a subsequent patch. [ mingo: Minor cleanups. ] Signed-off-by: Ard Biesheuvel Acked-by: Mike Rapoport Acked-by: Will Deacon Acked-by: Marc Zyngier Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190215123333.21209-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/memory.h | 11 +++++++++++ include/linux/memblock.h | 3 --- mm/memblock.c | 11 +++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e1ec947e7c0c..0c656850eeea 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -332,6 +332,17 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_addr_valid(kaddr) \ (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) +/* + * Given that the GIC architecture permits ITS implementations that can only be + * configured with a LPI table address once, GICv3 systems with many CPUs may + * end up reserving a lot of different regions after a kexec for their LPI + * tables (one per CPU), as we are forced to reuse the same memory after kexec + * (and thus reserve it persistently with EFI beforehand) + */ +#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS) +# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) +#endif + #include #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 64c41cf45590..859b55b66db2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -29,9 +29,6 @@ extern unsigned long max_pfn; */ extern unsigned long long max_possible_pfn; -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_PHYSMEM_REGIONS 4 - /** * enum memblock_flags - definition of memory region attributes * @MEMBLOCK_NONE: no special request diff --git a/mm/memblock.c b/mm/memblock.c index 022d4cbb3618..ea31045ba704 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,6 +26,13 @@ #include "internal.h" +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_PHYSMEM_REGIONS 4 + +#ifndef INIT_MEMBLOCK_RESERVED_REGIONS +# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#endif + /** * DOC: memblock overview * @@ -92,7 +99,7 @@ unsigned long max_pfn; unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; #endif @@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = { .reserved.regions = memblock_reserved_init_regions, .reserved.cnt = 1, /* empty dummy entry */ - .reserved.max = INIT_MEMBLOCK_REGIONS, + .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP From 582a32e708823e5957fd73ccd78dc4a9e49d21ea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Feb 2019 13:33:33 +0100 Subject: [PATCH 1282/1436] efi/arm: Revert "Defer persistent reservations until after paging_init()" This reverts commit eff896288872d687d9662000ec9ae11b6d61766f, which deferred the processing of persistent memory reservations to a point where the memory may have already been allocated and overwritten, defeating the purpose. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Cc: Linus Torvalds Cc: Marc Zyngier Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190215123333.21209-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/setup.c | 1 - drivers/firmware/efi/efi.c | 4 ---- drivers/firmware/efi/libstub/arm-stub.c | 3 --- include/linux/efi.h | 7 ------- 4 files changed, 15 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 4b0e1231625c..d09ec76f08cf 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,7 +313,6 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); - efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4c46ff6f2242..55b77c576c42 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -592,11 +592,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, early_memunmap(tbl, sizeof(*tbl)); } - return 0; -} -int __init efi_apply_persistent_mem_reservations(void) -{ if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) { unsigned long prsv = efi.mem_reserve; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index eee42d5e25ee..c037c6c5d0b7 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -75,9 +75,6 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; efi_status_t status; - if (IS_ENABLED(CONFIG_ARM)) - return; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), (void **)&rsv); if (status != EFI_SUCCESS) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..28604a8d0aa9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1198,8 +1198,6 @@ static inline bool efi_enabled(int feature) extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern bool efi_is_table_address(unsigned long phys_addr); - -extern int efi_apply_persistent_mem_reservations(void); #else static inline bool efi_enabled(int feature) { @@ -1218,11 +1216,6 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } - -static inline int efi_apply_persistent_mem_reservations(void) -{ - return 0; -} #endif extern int efi_status_to_err(efi_status_t status); From 8681ef1f3d295bd3600315325f3b3396d76d02f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 16 Feb 2019 13:44:39 -0800 Subject: [PATCH 1283/1436] net: Add header for usage of fls64() Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") Suggested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index fce28562bed2..4c76fe2c8488 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include #include typedef u64 netdev_features_t; From 4057765f2dee79cb92f9067909477303360be8d3 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 13 Feb 2019 04:30:34 +0100 Subject: [PATCH 1284/1436] sock: consistent handling of extreme SO_SNDBUF/SO_RCVBUF values SO_SNDBUF and SO_RCVBUF (and their *BUFFORCE version) may overflow or underflow their input value. This patch aims at providing explicit handling of these extreme cases, to get a clear behaviour even with values bigger than INT_MAX / 2 or lower than INT_MIN / 2. For simplicity, only SO_SNDBUF and SO_SNDBUFFORCE are described here, but the same explanation and fix apply to SO_RCVBUF and SO_RCVBUFFORCE (with 'SNDBUF' replaced by 'RCVBUF' and 'wmem_max' by 'rmem_max'). Overflow of positive values =========================== When handling SO_SNDBUF or SO_SNDBUFFORCE, if 'val' exceeds INT_MAX / 2, the buffer size is set to its minimum value because 'val * 2' overflows, and max_t() considers that it's smaller than SOCK_MIN_SNDBUF. For SO_SNDBUF, this can only happen with net.core.wmem_max > INT_MAX / 2. SO_SNDBUF and SO_SNDBUFFORCE are actually designed to let users probe for the maximum buffer size by setting an arbitrary large number that gets capped to the maximum allowed/possible size. Having the upper half of the positive integer space to potentially reduce the buffer size to its minimum value defeats this purpose. This patch caps the base value to INT_MAX / 2, so that bigger values don't overflow and keep setting the buffer size to its maximum. Underflow of negative values ============================ For negative numbers, SO_SNDBUF always considers them bigger than net.core.wmem_max, which is bounded by [SOCK_MIN_SNDBUF, INT_MAX]. Therefore such values are set to net.core.wmem_max and we're back to the behaviour of positive integers described above (return maximum buffer size if wmem_max <= INT_MAX / 2, return SOCK_MIN_SNDBUF otherwise). However, SO_SNDBUFFORCE behaves differently. The user value is directly multiplied by two and compared with SOCK_MIN_SNDBUF. If 'val * 2' doesn't underflow or if it underflows to a value smaller than SOCK_MIN_SNDBUF then buffer size is set to its minimum value. Otherwise the buffer size is set to the underflowed value. This patch treats negative values passed to SO_SNDBUFFORCE as null, to prevent underflows. Therefore negative values now always set the buffer size to its minimum value. Even though SO_SNDBUF behaves inconsistently by setting buffer size to the maximum value when passed a negative number, no attempt is made to modify this behaviour. There may exist some programs that rely on using negative numbers to set the maximum buffer size. Avoiding overflows because of extreme net.core.wmem_max values is the most we can do here. Summary of altered behaviours ============================= val : user-space value passed to setsockopt() val_uf : the underflowed value resulting from doubling val when val < INT_MIN / 2 wmem_max : short for net.core.wmem_max val_cap : min(val, wmem_max) min_len : minimal buffer length (that is, SOCK_MIN_SNDBUF) max_len : maximal possible buffer length, regardless of wmem_max (that is, INT_MAX - 1) ^^^^ : altered behaviour SO_SNDBUF: +-------------------------+-------------+------------+----------------+ | CONDITION | OLD RESULT | NEW RESULT | COMMENT | +-------------------------+-------------+------------+----------------+ | val < 0 && | | | No overflow, | | wmem_max <= INT_MAX/2 | wmem_max*2 | wmem_max*2 | keep original | | | | | behaviour | +-------------------------+-------------+------------+----------------+ | val < 0 && | | | Cap wmem_max | | INT_MAX/2 < wmem_max | min_len | max_len | to prevent | | | | ^^^^^^^ | overflow | +-------------------------+-------------+------------+----------------+ | 0 <= val <= min_len/2 | min_len | min_len | Ordinary case | +-------------------------+-------------+------------+----------------+ | min_len/2 < val && | val_cap*2 | val_cap*2 | Ordinary case | | val_cap <= INT_MAX/2 | | | | +-------------------------+-------------+------------+----------------+ | min_len < val && | | | Cap val_cap | | INT_MAX/2 < val_cap | min_len | max_len | again to | | (implies that | | ^^^^^^^ | prevent | | INT_MAX/2 < wmem_max) | | | overflow | +-------------------------+-------------+------------+----------------+ SO_SNDBUFFORCE: +------------------------------+---------+---------+------------------+ | CONDITION | BEFORE | AFTER | COMMENT | | | PATCH | PATCH | | +------------------------------+---------+---------+------------------+ | val < INT_MIN/2 && | min_len | min_len | Underflow with | | val_uf <= min_len | | | no consequence | +------------------------------+---------+---------+------------------+ | val < INT_MIN/2 && | val_uf | min_len | Set val to 0 to | | val_uf > min_len | | ^^^^^^^ | avoid underflow | +------------------------------+---------+---------+------------------+ | INT_MIN/2 <= val < 0 | min_len | min_len | No underflow | +------------------------------+---------+---------+------------------+ | 0 <= val <= min_len/2 | min_len | min_len | Ordinary case | +------------------------------+---------+---------+------------------+ | min_len/2 < val <= INT_MAX/2 | val*2 | val*2 | Ordinary case | +------------------------------+---------+---------+------------------+ | INT_MAX/2 < val | min_len | max_len | Cap val to | | | | ^^^^^^^ | prevent overflow | +------------------------------+---------+---------+------------------+ Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/core/sock.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/core/sock.c b/net/core/sock.c index d9f0a817dca8..f4b8b78535f8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -785,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ @@ -796,6 +800,12 @@ set_sndbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_sndbuf; case SO_RCVBUF: @@ -806,6 +816,10 @@ set_sndbuf: */ val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* * We double it on the way in to account for @@ -830,6 +844,12 @@ set_rcvbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_rcvbuf; case SO_KEEPALIVE: From d0edde8d29887831c38c419728d5e0cab2419b88 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Feb 2019 09:56:35 +0300 Subject: [PATCH 1285/1436] atm: clean up vcc_seq_next() It's confusing to call PTR_ERR(v). The PTR_ERR() function is basically a fancy cast to long so it makes you wonder, was IS_ERR() intended? But that doesn't make sense because vcc_walk() doesn't return error pointers. This patch doesn't affect runtime, it's just a cleanup. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/atm/proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/atm/proc.c b/net/atm/proc.c index 0b0495a41bbe..d79221fd4dae 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = vcc_walk(seq, 1); - *pos += !!PTR_ERR(v); + if (v) + (*pos)++; return v; } From f186a82b10dc229f9cd1e9f27f90cb0ce86e879d Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Thu, 14 Feb 2019 17:03:44 +0100 Subject: [PATCH 1286/1436] net: stmmac: use correct define to get rx timestamp on GMAC4 In dwmac4_wrback_get_rx_timestamp_status we looking for a RX timestamp. For that receive descriptors are handled and so we should use defines related to receive descriptors. It'll no change the functional behavior as RDES3_RDES1_VALID=TDES3_RS1V=BIT(26) but it makes code easier to read. Signed-off-by: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 20299f6f65fc..90045fffd393 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -268,7 +268,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc, int ret = -EINVAL; /* Get the status from normal w/b descriptor */ - if (likely(p->des3 & TDES3_RS1V)) { + if (likely(le32_to_cpu(p->des3) & RDES3_RDES1_VALID)) { if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) { int i = 0; From 9d6b3584a7a9d7fa872cb17ceda7a3b208f441eb Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Fri, 15 Feb 2019 15:49:33 -0800 Subject: [PATCH 1287/1436] selftests: bpf: test_lwt_ip_encap: add negative tests. As requested by David Ahern: - add negative tests (no routes, explicitly unreachable destinations) to exercize error handling code paths; - do not exit on test failures, but instead print a summary of passed/failed tests at the end. Future patches will add TSO and VRF tests. Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/test_lwt_ip_encap.sh | 111 ++++++++++++++---- 1 file changed, 88 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 4ca714e23ab0..612632c1425f 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -38,8 +38,6 @@ # ping: SRC->[encap at veth2:ingress]->GRE:decap->DST # ping replies go DST->SRC directly -set -e # exit on error - if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -76,8 +74,37 @@ readonly IPv6_GRE="fb10::1" readonly IPv6_SRC=$IPv6_1 readonly IPv6_DST=$IPv6_4 -setup() { -set -e # exit on error +TEST_STATUS=0 +TESTS_SUCCEEDED=0 +TESTS_FAILED=0 + +process_test_results() +{ + if [[ "${TEST_STATUS}" -eq 0 ]] ; then + echo "PASS" + TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1)) + else + echo "FAIL" + TESTS_FAILED=$((TESTS_FAILED+1)) + fi +} + +print_test_summary_and_exit() +{ + echo "passed tests: ${TESTS_SUCCEEDED}" + echo "failed tests: ${TESTS_FAILED}" + if [ "${TESTS_FAILED}" -eq "0" ] ; then + exit 0 + else + exit 1 + fi +} + +setup() +{ + set -e # exit on error + TEST_STATUS=0 + # create devices and namespaces ip netns add "${NS1}" ip netns add "${NS2}" @@ -178,7 +205,7 @@ set -e # exit on error # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 ip -netns ${NS3} link set gre_dev up - ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev + ip -netns ${NS3} addr add ${IPv4_GRE} nodad dev gre_dev ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} @@ -194,9 +221,13 @@ set -e # exit on error ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 + + sleep 1 # reduce flakiness + set +e } -cleanup() { +cleanup() +{ ip netns del ${NS1} 2> /dev/null ip netns del ${NS2} 2> /dev/null ip netns del ${NS3} 2> /dev/null @@ -204,12 +235,28 @@ cleanup() { trap cleanup EXIT -test_ping() { +remove_routes_to_gredev() +{ + ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 + ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 + ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 + ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 +} + +add_unreachable_routes_to_gredev() +{ + ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 + ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 + ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 + ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 +} + +test_ping() +{ local readonly PROTO=$1 local readonly EXPECTED=$2 local RET=0 - set +e if [ "${PROTO}" == "IPv4" ] ; then ip netns exec ${NS1} ping -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null RET=$? @@ -217,29 +264,26 @@ test_ping() { ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null RET=$? else - echo "test_ping: unknown PROTO: ${PROTO}" - exit 1 + echo " test_ping: unknown PROTO: ${PROTO}" + TEST_STATUS=1 fi - set -e if [ "0" != "${RET}" ]; then RET=1 fi if [ "${EXPECTED}" != "${RET}" ] ; then - echo "FAIL: test_ping: ${RET}" - exit 1 + echo " test_ping failed: expected: ${EXPECTED}; got ${RET}" + TEST_STATUS=1 fi } -test_egress() { +test_egress() +{ local readonly ENCAP=$1 echo "starting egress ${ENCAP} encap test" setup - # need to wait a bit for IPv6 to autoconf, otherwise - # ping6 sometimes fails with "unable to bind to address" - # by default, pings work test_ping IPv4 0 test_ping IPv6 0 @@ -258,16 +302,28 @@ test_egress() { ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1 else - echo "FAIL: unknown encap ${ENCAP}" + echo " unknown encap ${ENCAP}" + TEST_STATUS=1 fi test_ping IPv4 0 test_ping IPv6 0 + # a negative test: remove routes to GRE devices: ping fails + remove_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + # another negative test + add_unreachable_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + cleanup - echo "PASS" + process_test_results } -test_ingress() { +test_ingress() +{ local readonly ENCAP=$1 echo "starting ingress ${ENCAP} encap test" setup @@ -298,14 +354,23 @@ test_ingress() { test_ping IPv4 0 test_ping IPv6 0 + # a negative test: remove routes to GRE devices: ping fails + remove_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + + # another negative test + add_unreachable_routes_to_gredev + test_ping IPv4 1 + test_ping IPv6 1 + cleanup - echo "PASS" + process_test_results } test_egress IPv4 test_egress IPv6 - test_ingress IPv4 test_ingress IPv6 -echo "all tests passed" +print_test_summary_and_exit From 5aab392c55c96f9bb26d9294f965f156a87ee81c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 15 Feb 2019 19:52:18 -0800 Subject: [PATCH 1288/1436] tools/libbpf: support bigger BTF data sizes While it's understandable why kernel limits number of BTF types to 65535 and size of string section to 64KB, in libbpf as user-space library it's too restrictive. E.g., pahole converting DWARF to BTF type information for Linux kernel generates more than 3 million BTF types and more than 3MB of strings, before deduplication. So to allow btf__dedup() to do its work, we need to be able to load bigger BTF sections using btf__new(). Singed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ade1c32fb083..68b50e9bbde1 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -16,7 +16,8 @@ #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) -#define BTF_MAX_NR_TYPES 65535 +#define BTF_MAX_NR_TYPES 0x7fffffff +#define BTF_MAX_STR_OFFSET 0x7fffffff #define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ ((k) == BTF_KIND_VOLATILE) || \ @@ -175,7 +176,7 @@ static int btf_parse_str_sec(struct btf *btf) const char *start = btf->nohdr_data + hdr->str_off; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || start[0] || end[-1]) { pr_debug("Invalid BTF string section\n"); return -EINVAL; @@ -1882,7 +1883,7 @@ static int btf_dedup_prim_types(struct btf_dedup *d) */ static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) { - return d->map[type_id] <= BTF_MAX_TYPE; + return d->map[type_id] <= BTF_MAX_NR_TYPES; } /* @@ -2033,7 +2034,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, canon_id = resolve_fwd_id(d, canon_id); hypot_type_id = d->hypot_map[canon_id]; - if (hypot_type_id <= BTF_MAX_TYPE) + if (hypot_type_id <= BTF_MAX_NR_TYPES) return hypot_type_id == cand_id; if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) @@ -2252,7 +2253,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) __u32 h; /* already deduped or is in process of deduping (loop detected) */ - if (d->map[type_id] <= BTF_MAX_TYPE) + if (d->map[type_id] <= BTF_MAX_NR_TYPES) return 0; t = d->btf->types[type_id]; @@ -2329,7 +2330,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) if (d->map[type_id] == BTF_IN_PROGRESS_ID) return -ELOOP; - if (d->map[type_id] <= BTF_MAX_TYPE) + if (d->map[type_id] <= BTF_MAX_NR_TYPES) return resolve_type_id(d, type_id); t = d->btf->types[type_id]; @@ -2509,7 +2510,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) resolved_type_id = resolve_type_id(d, type_id); new_type_id = d->hypot_map[resolved_type_id]; - if (new_type_id > BTF_MAX_TYPE) + if (new_type_id > BTF_MAX_NR_TYPES) return -EINVAL; return new_type_id; } From a58007621be33e9f7c7bed5d5ff8ecb914e1044a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Feb 2019 15:00:36 +1100 Subject: [PATCH 1289/1436] powerpc/64s: Fix possible corruption on big endian due to pgd/pud_present() In v4.20 we changed our pgd/pud_present() to check for _PAGE_PRESENT rather than just checking that the value is non-zero, e.g.: static inline int pgd_present(pgd_t pgd) { - return !pgd_none(pgd); + return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); } Unfortunately this is broken on big endian, as the result of the bitwise & is truncated to int, which is always zero because _PAGE_PRESENT is 0x8000000000000000ul. This means pgd_present() and pud_present() are always false at compile time, and the compiler elides the subsequent code. Remarkably with that bug present we are still able to boot and run with few noticeable effects. However under some work loads we are able to trigger a warning in the ext4 code: WARNING: CPU: 11 PID: 29593 at fs/ext4/inode.c:3927 .ext4_set_page_dirty+0x70/0xb0 CPU: 11 PID: 29593 Comm: debugedit Not tainted 4.20.0-rc1 #1 ... NIP .ext4_set_page_dirty+0x70/0xb0 LR .set_page_dirty+0xa0/0x150 Call Trace: .set_page_dirty+0xa0/0x150 .unmap_page_range+0xbf0/0xe10 .unmap_vmas+0x84/0x130 .unmap_region+0xe8/0x190 .__do_munmap+0x2f0/0x510 .__vm_munmap+0x80/0x110 .__se_sys_munmap+0x14/0x30 system_call+0x5c/0x70 The fix is simple, we need to convert the result of the bitwise & to an int before returning it. Thanks to Erhard, Jan Kara and Aneesh for help with debugging. Fixes: da7ad366b497 ("powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit") Cc: stable@vger.kernel.org # v4.20+ Reported-by: Erhard F. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c9bfe526ca9d..d8c8d7c9df15 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -904,7 +904,7 @@ static inline int pud_none(pud_t pud) static inline int pud_present(pud_t pud) { - return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); } extern struct page *pud_page(pud_t pud); @@ -951,7 +951,7 @@ static inline int pgd_none(pgd_t pgd) static inline int pgd_present(pgd_t pgd) { - return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); } static inline pte_t pgd_pte(pgd_t pgd) From 1cd48dc51857899e8fb28dd45d4b936c94ea1dab Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 6 Feb 2019 10:32:46 -0800 Subject: [PATCH 1290/1436] Input: apanel - switch to using brightness_set_blocking() Now that LEDs core allows "blocking" flavor of "set brightness" method we can use it and get rid of private work item. As a bonus, we are no longer forgetting to cancel it when we unbind the driver. Reviewed-by: Sven Van Asbroeck Signed-off-by: Dmitry Torokhov --- drivers/input/misc/apanel.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/input/misc/apanel.c b/drivers/input/misc/apanel.c index 094bddf56755..c1e66f45d552 100644 --- a/drivers/input/misc/apanel.c +++ b/drivers/input/misc/apanel.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #define APANEL_NAME "Fujitsu Application Panel" @@ -59,8 +58,6 @@ struct apanel { struct i2c_client *client; unsigned short keymap[MAX_PANEL_KEYS]; u16 nkeys; - u16 led_bits; - struct work_struct led_work; struct led_classdev mail_led; }; @@ -109,25 +106,13 @@ static void apanel_poll(struct input_polled_dev *ipdev) report_key(idev, ap->keymap[i]); } -/* Track state changes of LED */ -static void led_update(struct work_struct *work) -{ - struct apanel *ap = container_of(work, struct apanel, led_work); - - i2c_smbus_write_word_data(ap->client, 0x10, ap->led_bits); -} - -static void mail_led_set(struct led_classdev *led, +static int mail_led_set(struct led_classdev *led, enum led_brightness value) { struct apanel *ap = container_of(led, struct apanel, mail_led); + u16 led_bits = value != LED_OFF ? 0x8000 : 0x0000; - if (value != LED_OFF) - ap->led_bits |= 0x8000; - else - ap->led_bits &= ~0x8000; - - schedule_work(&ap->led_work); + return i2c_smbus_write_word_data(ap->client, 0x10, led_bits); } static int apanel_remove(struct i2c_client *client) @@ -179,7 +164,7 @@ static struct apanel apanel = { }, .mail_led = { .name = "mail:blue", - .brightness_set = mail_led_set, + .brightness_set_blocking = mail_led_set, }, }; @@ -235,7 +220,6 @@ static int apanel_probe(struct i2c_client *client, if (err) goto out3; - INIT_WORK(&ap->led_work, led_update); if (device_chip[APANEL_DEV_LED] != CHIP_NONE) { err = led_classdev_register(&client->dev, &ap->mail_led); if (err) From 2439d37e1bf8a34d437573c086572abe0f3f1b15 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Sat, 16 Feb 2019 21:10:16 -0800 Subject: [PATCH 1291/1436] Input: st-keyscan - fix potential zalloc NULL dereference This patch fixes the following static checker warning: drivers/input/keyboard/st-keyscan.c:156 keyscan_probe() error: potential zalloc NULL dereference: 'keypad_data->input_dev' Reported-by: Dan Carpenter Signed-off-by: Gabriel Fernandez Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/st-keyscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/st-keyscan.c b/drivers/input/keyboard/st-keyscan.c index babcfb165e4f..3b85631fde91 100644 --- a/drivers/input/keyboard/st-keyscan.c +++ b/drivers/input/keyboard/st-keyscan.c @@ -153,6 +153,8 @@ static int keyscan_probe(struct platform_device *pdev) input_dev->id.bustype = BUS_HOST; + keypad_data->input_dev = input_dev; + error = keypad_matrix_key_parse_dt(keypad_data); if (error) return error; @@ -168,8 +170,6 @@ static int keyscan_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad_data); - keypad_data->input_dev = input_dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); keypad_data->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(keypad_data->base)) From 7ad222b3aed350adfc27ee7eec4587ffe55dfdce Mon Sep 17 00:00:00 2001 From: Mauro Ciancio Date: Mon, 14 Jan 2019 10:24:53 -0300 Subject: [PATCH 1292/1436] Input: elan_i2c - add ACPI ID for touchpad in Lenovo V330-15ISK This adds ELAN0617 to the ACPI table to support Elan touchpad found in Lenovo V330-15ISK. Signed-off-by: Mauro Ciancio Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index a94b6494e71a..225ae6980182 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1345,6 +1345,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN060C", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0617", 0 }, { "ELAN0618", 0 }, { "ELAN061C", 0 }, { "ELAN061D", 0 }, From 289460404f6947ef1c38e67d680be9a84161250b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 17 Feb 2019 07:18:41 +0000 Subject: [PATCH 1293/1436] mlxsw: __mlxsw_sp_port_headroom_set(): Fix a use of local variable The function-local variable "delay" enters the loop interpreted as delay in bits. However, inside the loop it gets overwritten by the result of mlxsw_sp_pg_buf_delay_get(), and thus leaves the loop as quantity in cells. Thus on second and further loop iterations, the headroom for a given priority is configured with a wrong size. Fix by introducing a loop-local variable, delay_cells. Rename thres to thres_cells for consistency. Fixes: f417f04da589 ("mlxsw: spectrum: Refactor port buffer configuration") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 32519c93df17..b65e274b02e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -862,8 +862,9 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { bool configure = false; bool pfc = false; + u16 thres_cells; + u16 delay_cells; bool lossy; - u16 thres; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { if (prio_tc[j] == i) { @@ -877,10 +878,11 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, continue; lossy = !(pfc || pause_en); - thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); - delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, - pause_en); - mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy); + thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, + pfc, pause_en); + mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres_cells + delay_cells, + thres_cells, lossy); } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); From 744e458aebf8dc7f33eee9af61aeb0145de921a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 17 Feb 2019 10:28:33 +0100 Subject: [PATCH 1294/1436] net: phy: add helper linkmode_adv_to_mii_10gbt_adv_t Add a helper linkmode_adv_to_mii_10gbt_adv_t(), similar to linkmode_adv_to_mii_adv_t. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index bfa7114167d7..dd46828b4c47 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -261,6 +261,31 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv) return reg; } +/** + * linkmode_adv_to_mii_10gbt_adv_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the C45 + * 10GBASE-T AN CONTROL (7.32) register. + */ +static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV2_5G; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV5G; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV10G; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); From 9a5dc8af441668a3db7fdcd927cb288be62c0a2e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 17 Feb 2019 10:29:19 +0100 Subject: [PATCH 1295/1436] net: phy: add genphy_c45_an_config_aneg C45 configuration of 10/100 and multi-giga bit auto negotiation advertisement is standardized. Configuration of 1000Base-T however appears to be vendor specific. Move the generic code out of the Marvell driver into the common phy-c45.c file. v2: - change function name to genphy_c45_an_config_aneg Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: use new helper linkmode_adv_to_mii_10gbt_adv_t and split patch] Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 44 +++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 45 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 7af5fa81daf6..e17672bd180e 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -74,6 +74,50 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); +/** + * genphy_c45_an_config_aneg - configure advertisement registers + * @phydev: target phy_device struct + * + * Configure advertisement registers based on modes set in phydev->advertising + * + * Returns negative errno code on failure, 0 if advertisement didn't change, + * or 1 if advertised modes changed. + */ +int genphy_c45_an_config_aneg(struct phy_device *phydev) +{ + int changed = 0, ret; + u32 adv; + + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); + + adv = linkmode_adv_to_mii_adv_t(phydev->advertising); + + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, + ADVERTISE_ALL | ADVERTISE_100BASE4 | + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising); + + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV10G | + MDIO_AN_10GBT_CTRL_ADV5G | + MDIO_AN_10GBT_CTRL_ADV2_5G, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + return changed; +} +EXPORT_SYMBOL_GPL(genphy_c45_an_config_aneg); + /** * genphy_c45_an_disable_aneg - disable auto-negotiation * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index bf1070c2a53b..3db507e68191 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1101,6 +1101,7 @@ int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); +int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); From 3de97f3c630850caf0fff67555bbe3f7ba61c308 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 17 Feb 2019 10:30:45 +0100 Subject: [PATCH 1296/1436] net: phy: marvell10g: use genphy_c45_an_config_aneg Use new function genphy_c45_config_aneg() in mv3310_config_aneg(). v2: - add a comment regarding 1000BaseT vendor registers v3: - rebased Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: patch splitted] Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 496805c0ddfe..8955740832cc 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -279,18 +279,15 @@ static int mv3310_config_aneg(struct phy_device *phydev) return genphy_c45_an_disable_aneg(phydev); } - linkmode_and(phydev->advertising, phydev->advertising, - phydev->supported); - - ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, - ADVERTISE_ALL | ADVERTISE_100BASE4 | - ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - linkmode_adv_to_mii_adv_t(phydev->advertising)); + ret = genphy_c45_an_config_aneg(phydev); if (ret < 0) return ret; if (ret > 0) changed = true; + /* Clause 45 has no standardized support for 1000BaseT, therefore + * use vendor registers for this mode. + */ reg = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MV_AN_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, reg); @@ -299,20 +296,6 @@ static int mv3310_config_aneg(struct phy_device *phydev) if (ret > 0) changed = true; - /* 10G control register */ - if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - phydev->advertising)) - reg = MDIO_AN_10GBT_CTRL_ADV10G; - else - reg = 0; - - ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, - MDIO_AN_10GBT_CTRL_ADV10G, reg); - if (ret < 0) - return ret; - if (ret > 0) - changed = true; - if (changed) ret = genphy_c45_restart_aneg(phydev); From 3ce2a027ae4e3ede876da6913641ad37c33b698b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 17 Feb 2019 10:32:29 +0100 Subject: [PATCH 1297/1436] net: phy: marvell10g: check for newly set aneg Even if the advertisement registers content didn't change, we may have just switched to aneg, and therefore have to trigger an aneg restart. This matches the behavior of genphy_config_aneg(). Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 8955740832cc..b83eb19cf8bb 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -296,6 +296,16 @@ static int mv3310_config_aneg(struct phy_device *phydev) if (ret > 0) changed = true; + if (!changed) { + /* Configure and restart aneg if it wasn't set before */ + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (ret < 0) + return ret; + + if (!(ret & MDIO_AN_CTRL1_ENABLE)) + changed = 1; + } + if (changed) ret = genphy_c45_restart_aneg(phydev); From 9c03b282badb7600058321c80e367f93b4013bd0 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 14 Feb 2019 09:15:10 -0800 Subject: [PATCH 1298/1436] trace: events: add a few neigh tracepoints The goal here is to trace neigh state changes covering all possible neigh update paths. Plus have a specific trace point in neigh_update to cover flags sent to neigh_update. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/trace/events/neigh.h | 204 +++++++++++++++++++++++++++++++++++ net/core/net-traces.c | 8 ++ 2 files changed, 212 insertions(+) create mode 100644 include/trace/events/neigh.h diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h new file mode 100644 index 000000000000..ed10353eee73 --- /dev/null +++ b/include/trace/events/neigh.h @@ -0,0 +1,204 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM neigh + +#if !defined(_TRACE_NEIGH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NEIGH_H + +#include +#include +#include +#include + +#define neigh_state_str(state) \ + __print_symbolic(state, \ + { NUD_INCOMPLETE, "incomplete" }, \ + { NUD_REACHABLE, "reachable" }, \ + { NUD_STALE, "stale" }, \ + { NUD_DELAY, "delay" }, \ + { NUD_PROBE, "probe" }, \ + { NUD_FAILED, "failed" }) + +TRACE_EVENT(neigh_update, + + TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new, + u32 flags, u32 nlmsg_pid), + + TP_ARGS(n, lladdr, new, flags, nlmsg_pid), + + TP_STRUCT__entry( + __field(u32, family) + __string(dev, (n->dev ? n->dev->name : "NULL")) + __array(u8, lladdr, MAX_ADDR_LEN) + __field(u8, lladdr_len) + __field(u8, flags) + __field(u8, nud_state) + __field(u8, type) + __field(u8, dead) + __field(int, refcnt) + __array(__u8, primary_key4, 4) + __array(__u8, primary_key6, 16) + __field(unsigned long, confirmed) + __field(unsigned long, updated) + __field(unsigned long, used) + __array(u8, new_lladdr, MAX_ADDR_LEN) + __field(u8, new_state) + __field(u32, update_flags) + __field(u32, pid) + ), + + TP_fast_assign( + int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN); + struct in6_addr *pin6; + __be32 *p32; + + __entry->family = n->tbl->family; + __assign_str(dev, (n->dev ? n->dev->name : "NULL")); + __entry->lladdr_len = lladdr_len; + memcpy(__entry->lladdr, n->ha, lladdr_len); + __entry->flags = n->flags; + __entry->nud_state = n->nud_state; + __entry->type = n->type; + __entry->dead = n->dead; + __entry->refcnt = refcount_read(&n->refcnt); + pin6 = (struct in6_addr *)__entry->primary_key6; + p32 = (__be32 *)__entry->primary_key4; + + if (n->tbl->family == AF_INET) + *p32 = *(__be32 *)n->primary_key; + else + *p32 = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl->family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->primary_key6; + *pin6 = *(struct in6_addr *)n->primary_key; + } else +#endif + { + ipv6_addr_set_v4mapped(*p32, pin6); + } + __entry->confirmed = n->confirmed; + __entry->updated = n->updated; + __entry->used = n->used; + if (lladdr) + memcpy(__entry->new_lladdr, lladdr, lladdr_len); + __entry->new_state = new; + __entry->update_flags = flags; + __entry->pid = nlmsg_pid; + ), + + TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x " + "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c " + "confirmed %lu updated %lu used %lu new_lladdr %s " + "new_state %02x update_flags %02x pid %d", + __entry->family, __get_str(dev), + __print_hex_str(__entry->lladdr, __entry->lladdr_len), + __entry->flags, neigh_state_str(__entry->nud_state), + __entry->type, __entry->dead, __entry->refcnt, + __entry->primary_key4, __entry->primary_key6, + __entry->confirmed, __entry->updated, __entry->used, + __print_hex_str(__entry->new_lladdr, __entry->lladdr_len), + __entry->new_state, + __entry->update_flags, __entry->pid) +); + +DECLARE_EVENT_CLASS(neigh__update, + TP_PROTO(struct neighbour *n, int err), + TP_ARGS(n, err), + TP_STRUCT__entry( + __field(u32, family) + __string(dev, (n->dev ? n->dev->name : "NULL")) + __array(u8, lladdr, MAX_ADDR_LEN) + __field(u8, lladdr_len) + __field(u8, flags) + __field(u8, nud_state) + __field(u8, type) + __field(u8, dead) + __field(int, refcnt) + __array(__u8, primary_key4, 4) + __array(__u8, primary_key6, 16) + __field(unsigned long, confirmed) + __field(unsigned long, updated) + __field(unsigned long, used) + __field(u32, err) + ), + + TP_fast_assign( + int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN); + struct in6_addr *pin6; + __be32 *p32; + + __entry->family = n->tbl->family; + __assign_str(dev, (n->dev ? n->dev->name : "NULL")); + __entry->lladdr_len = lladdr_len; + memcpy(__entry->lladdr, n->ha, lladdr_len); + __entry->flags = n->flags; + __entry->nud_state = n->nud_state; + __entry->type = n->type; + __entry->dead = n->dead; + __entry->refcnt = refcount_read(&n->refcnt); + pin6 = (struct in6_addr *)__entry->primary_key6; + p32 = (__be32 *)__entry->primary_key4; + + if (n->tbl->family == AF_INET) + *p32 = *(__be32 *)n->primary_key; + else + *p32 = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl->family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->primary_key6; + *pin6 = *(struct in6_addr *)n->primary_key; + } else +#endif + { + ipv6_addr_set_v4mapped(*p32, pin6); + } + + __entry->confirmed = n->confirmed; + __entry->updated = n->updated; + __entry->used = n->used; + __entry->err = err; + ), + + TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x " + "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c " + "confirmed %lu updated %lu used %lu err %d", + __entry->family, __get_str(dev), + __print_hex_str(__entry->lladdr, __entry->lladdr_len), + __entry->flags, neigh_state_str(__entry->nud_state), + __entry->type, __entry->dead, __entry->refcnt, + __entry->primary_key4, __entry->primary_key6, + __entry->confirmed, __entry->updated, __entry->used, + __entry->err) +); + +DEFINE_EVENT(neigh__update, neigh_update_done, + TP_PROTO(struct neighbour *neigh, int err), + TP_ARGS(neigh, err) +); + +DEFINE_EVENT(neigh__update, neigh_timer_handler, + TP_PROTO(struct neighbour *neigh, int err), + TP_ARGS(neigh, err) +); + +DEFINE_EVENT(neigh__update, neigh_event_send_done, + TP_PROTO(struct neighbour *neigh, int err), + TP_ARGS(neigh, err) +); + +DEFINE_EVENT(neigh__update, neigh_event_send_dead, + TP_PROTO(struct neighbour *neigh, int err), + TP_ARGS(neigh, err) +); + +DEFINE_EVENT(neigh__update, neigh_cleanup_and_release, + TP_PROTO(struct neighbour *neigh, int rc), + TP_ARGS(neigh, rc) +); + +#endif /* _TRACE_NEIGH_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 419af6dfe29f..470b179d599e 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif +#include +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release); + EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); From 56dd18a49f6ee62ce06eb6cd0aec050e23d4e603 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 14 Feb 2019 09:15:11 -0800 Subject: [PATCH 1299/1436] neigh: hook tracepoints in neigh update code hook tracepoints at the end of functions that update a neigh entry. neigh_update gets an additional tracepoint to trace the update flags and old and new neigh states. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/neighbour.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4230400b9a30..30f6fd8f68e0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -42,6 +42,8 @@ #include #include +#include + #define DEBUG #define NEIGH_DEBUG 1 #define neigh_dbg(level, fmt, ...) \ @@ -102,6 +104,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) if (neigh->parms->neigh_cleanup) neigh->parms->neigh_cleanup(neigh); + trace_neigh_cleanup_and_release(neigh, 0); __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); neigh_release(neigh); @@ -1095,6 +1098,8 @@ out: if (notify) neigh_update_notify(neigh, 0); + trace_neigh_timer_handler(neigh, 0); + neigh_release(neigh); } @@ -1165,6 +1170,7 @@ out_unlock_bh: else write_unlock(&neigh->lock); local_bh_enable(); + trace_neigh_event_send_done(neigh, rc); return rc; out_dead: @@ -1172,6 +1178,7 @@ out_dead: goto out_unlock_bh; write_unlock_bh(&neigh->lock); kfree_skb(skb); + trace_neigh_event_send_dead(neigh, 1); return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1227,6 +1234,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, struct net_device *dev; int update_isrouter = 0; + trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); + write_lock_bh(&neigh->lock); dev = neigh->dev; @@ -1393,6 +1402,8 @@ out: if (notify) neigh_update_notify(neigh, nlmsg_pid); + trace_neigh_update_done(neigh, err); + return err; } From 6a79507cfe94c7729207659501ff88914b3eb198 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 14 Feb 2019 20:22:55 +0000 Subject: [PATCH 1300/1436] mlxsw: core: Extend thermal module with per QSFP module thermal zones Add a dedicated thermal zone for each QSFP/SFP module. The current temperature is obtained from the module's temperature sensor and the trip points are set based on the warning and critical thresholds read from the module. A cooling device (fan) is bound to all the thermal zones. The thermal zone governor is set to user space in order to avoid collisions between thermal zones. For example, one thermal zone might want to increase the speed of the fan, whereas another one would like to decrease it. Deferring this decision to user space allows the user to the take the most suitable decision. Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/core_thermal.c | 400 ++++++++++++++++++ 1 file changed, 400 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 821fef2e2230..0b85c7252f9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -9,8 +9,10 @@ #include #include #include +#include #include "core.h" +#include "core_env.h" #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ #define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ @@ -19,6 +21,8 @@ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ #define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ +#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) +#define MLXSW_THERMAL_ZONE_MAX_NAME 16 #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -36,6 +40,13 @@ static char * const mlxsw_thermal_external_allowed_cdev[] = { "mlxreg_fan", }; +enum mlxsw_thermal_trips { + MLXSW_THERMAL_TEMP_TRIP_NORM, + MLXSW_THERMAL_TEMP_TRIP_HIGH, + MLXSW_THERMAL_TEMP_TRIP_HOT, + MLXSW_THERMAL_TEMP_TRIP_CRIT, +}; + struct mlxsw_thermal_trip { int type; int temp; @@ -80,6 +91,16 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = { /* Make sure all trips are writable */ #define MLXSW_THERMAL_TRIP_MASK (BIT(MLXSW_THERMAL_NUM_TRIPS) - 1) +struct mlxsw_thermal; + +struct mlxsw_thermal_module { + struct mlxsw_thermal *parent; + struct thermal_zone_device *tzdev; + struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; + enum thermal_device_mode mode; + int module; +}; + struct mlxsw_thermal { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -89,6 +110,8 @@ struct mlxsw_thermal { u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; + struct mlxsw_thermal_module *tz_module_arr; + unsigned int tz_module_num; }; static inline u8 mlxsw_state_to_duty(int state) @@ -122,6 +145,57 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, return -ENODEV; } +static void +mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) +{ + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0; +} + +static int +mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal_module *tz) +{ + int crit_temp, emerg_temp; + int err; + + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_WARN, + &crit_temp); + if (err) + return err; + + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_ALARM, + &emerg_temp); + if (err) + return err; + + /* According to the system thermal requirements, the thermal zones are + * defined with four trip points. The critical and emergency + * temperature thresholds, provided by QSFP module are set as "active" + * and "hot" trip points, "normal" and "critical" trip points are + * derived from "active" and "hot" by subtracting or adding double + * hysteresis value. + */ + if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp - + MLXSW_THERMAL_MODULE_TEMP_SHIFT; + else + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; + if (emerg_temp > crit_temp) + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + MLXSW_THERMAL_MODULE_TEMP_SHIFT; + else + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; + + return 0; +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -291,6 +365,204 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .set_trip_hyst = mlxsw_thermal_set_trip_hyst, }; +static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i, j, err; + + /* If the cooling device is one of ours bind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + const struct mlxsw_thermal_trip *trip = &tz->trips[i]; + + err = thermal_zone_bind_cooling_device(tzdev, i, cdev, + trip->max_state, + trip->min_state, + THERMAL_WEIGHT_DEFAULT); + if (err < 0) + goto err_bind_cooling_device; + } + return 0; + +err_bind_cooling_device: + for (j = i - 1; j >= 0; j--) + thermal_zone_unbind_cooling_device(tzdev, j, cdev); + return err; +} + +static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i; + int err; + + /* If the cooling device is one of ours unbind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + err = thermal_zone_unbind_cooling_device(tzdev, i, cdev); + WARN_ON(err); + } + return err; +} + +static int mlxsw_thermal_module_mode_get(struct thermal_zone_device *tzdev, + enum thermal_device_mode *mode) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + *mode = tz->mode; + + return 0; +} + +static int mlxsw_thermal_module_mode_set(struct thermal_zone_device *tzdev, + enum thermal_device_mode mode) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + mutex_lock(&tzdev->lock); + + if (mode == THERMAL_DEVICE_ENABLED) + tzdev->polling_delay = thermal->polling_delay; + else + tzdev->polling_delay = 0; + + mutex_unlock(&tzdev->lock); + + tz->mode = mode; + thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED); + + return 0; +} + +static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + struct device *dev = thermal->bus_info->dev; + char mtbr_pl[MLXSW_REG_MTBR_LEN]; + u16 temp; + int err; + + /* Read module temperature. */ + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + + tz->module, 1); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + /* Update temperature. */ + switch (temp) { + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */ + case MLXSW_REG_MTBR_BAD_SENS_INFO: + temp = 0; + break; + default: + temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + /* Reset all trip point. */ + mlxsw_thermal_module_trips_reset(tz); + /* Update trip points. */ + err = mlxsw_thermal_module_trips_update(dev, thermal->core, + tz); + if (err) + return err; + break; + } + + *p_temp = (int) temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip, + enum thermal_trip_type *p_type) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_type = tz->trips[trip].type; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev, + int trip, int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_temp = tz->trips[trip].temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev, + int trip, int temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || + temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp) + return -EINVAL; + + tz->trips[trip].temp = temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip, + int *p_hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + *p_hyst = tz->trips[trip].hyst; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, + int hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + tz->trips[trip].hyst = hyst; + return 0; +} + +static struct thermal_zone_params mlxsw_thermal_module_params = { + .governor_name = "user_space", +}; + +static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_module_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, +}; + static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, unsigned long *p_state) { @@ -391,6 +663,123 @@ static const struct thermal_cooling_device_ops mlxsw_cooling_ops = { .set_cur_state = mlxsw_thermal_set_cur_state, }; +static int +mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + int err; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d", + module_tz->module + 1); + module_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + module_tz, + &mlxsw_thermal_module_ops, + &mlxsw_thermal_module_params, + 0, 0); + if (IS_ERR(module_tz->tzdev)) { + err = PTR_ERR(module_tz->tzdev); + return err; + } + + return 0; +} + +static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) +{ + thermal_zone_device_unregister(tzdev); +} + +static int +mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal, u8 local_port) +{ + struct mlxsw_thermal_module *module_tz; + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + u8 width, module; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + if (!width) + return 0; + + module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + module_tz = &thermal->tz_module_arr[module]; + module_tz->module = module; + module_tz->parent = thermal; + memcpy(module_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + /* Initialize all trip point. */ + mlxsw_thermal_module_trips_reset(module_tz); + /* Update trip point according to the module data. */ + err = mlxsw_thermal_module_trips_update(dev, core, module_tz); + if (err) + return err; + + thermal->tz_module_num++; + + return 0; +} + +static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) +{ + if (module_tz && module_tz->tzdev) { + mlxsw_thermal_module_tz_fini(module_tz->tzdev); + module_tz->tzdev = NULL; + } +} + +static int +mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal) +{ + unsigned int module_count = mlxsw_core_max_ports(core); + int i, err; + + thermal->tz_module_arr = kcalloc(module_count, + sizeof(*thermal->tz_module_arr), + GFP_KERNEL); + if (!thermal->tz_module_arr) + return -ENOMEM; + + for (i = 1; i < module_count; i++) { + err = mlxsw_thermal_module_init(dev, core, thermal, i); + if (err) + goto err_unreg_tz_module_arr; + } + + for (i = 0; i < thermal->tz_module_num; i++) { + err = mlxsw_thermal_module_tz_init(&thermal->tz_module_arr[i]); + if (err) + goto err_unreg_tz_module_arr; + } + + return 0; + +err_unreg_tz_module_arr: + for (i = module_count - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); + kfree(thermal->tz_module_arr); + return err; +} + +static void +mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) +{ + unsigned int module_count = mlxsw_core_max_ports(thermal->core); + int i; + + for (i = module_count - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); + kfree(thermal->tz_module_arr); +} + int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) @@ -477,9 +866,19 @@ int mlxsw_thermal_init(struct mlxsw_core *core, goto err_unreg_cdevs; } + err = mlxsw_thermal_modules_init(dev, core, thermal); + if (err) + goto err_unreg_tzdev; + thermal->mode = THERMAL_DEVICE_ENABLED; *p_thermal = thermal; return 0; + +err_unreg_tzdev: + if (thermal->tzdev) { + thermal_zone_device_unregister(thermal->tzdev); + thermal->tzdev = NULL; + } err_unreg_cdevs: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) if (thermal->cdevs[i]) @@ -493,6 +892,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) { int i; + mlxsw_thermal_modules_fini(thermal); if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); thermal->tzdev = NULL; From 1eb00162f86abda9535de5cbaf6e168f7747ee90 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 14 Feb 2019 22:35:47 +0100 Subject: [PATCH 1301/1436] net: caif: use skb helpers instead of open-coding them Use existing skb_put_data() and skb_trim() instead of open-coding them, with the skb_put_data() first so that logically, `skb` still contains the data to be copied in its data..tail area when skb_put_data() reads it. This change on its own is a cleanup, and it is also necessary for potential future integration of skbuffs with things like KASAN. Signed-off-by: Jann Horn Signed-off-by: David S. Miller --- net/caif/cfpkt_skbuff.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 38c2b7a890dd..37ac5ca0ffdf 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -319,16 +319,12 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, if (tmppkt == NULL) return NULL; tmp = pkt_to_skb(tmppkt); - skb_set_tail_pointer(tmp, dstlen); - tmp->len = dstlen; - memcpy(tmp->data, dst->data, dstlen); + skb_put_data(tmp, dst->data, dstlen); cfpkt_destroy(dstpkt); dst = tmp; } - memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add)); + skb_put_data(dst, add->data, skb_headlen(add)); cfpkt_destroy(addpkt); - dst->tail += addlen; - dst->len += addlen; return skb_to_pkt(dst); } @@ -359,13 +355,11 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) if (skb2 == NULL) return NULL; - /* Reduce the length of the original packet */ - skb_set_tail_pointer(skb, pos); - skb->len = pos; + skb_put_data(skb2, split, len2nd); + + /* Reduce the length of the original packet */ + skb_trim(skb, pos); - memcpy(skb2->data, split, len2nd); - skb2->tail += len2nd; - skb2->len += len2nd; skb2->priority = skb->priority; return skb_to_pkt(skb2); } From 31a998487641bc35bc7465781486ea8f8ddc0b82 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 Feb 2019 17:20:07 +0200 Subject: [PATCH 1302/1436] net: sched: fw: don't set arg->stop in fw_walk() when empty Some classifiers set arg->stop in their implementation of tp->walk() API when empty. Most of classifiers do not adhere to that convention. Do not set arg->stop in fw_walk() to unify tp->walk() behavior among classifier implementations. Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 317151bae73b..4e34966f2ae2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -362,10 +362,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct fw_head *head = rtnl_dereference(tp->root); int h; - if (head == NULL) - arg->stop = 1; - - if (arg->stop) + if (head == NULL || arg->stop) return; for (h = 0; h < HTSIZE; h++) { From 3027ff41f67cbef1ad58af68601455db81b0fae5 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 Feb 2019 17:21:00 +0200 Subject: [PATCH 1303/1436] net: sched: route: don't set arg->stop in route4_walk() when empty Some classifiers set arg->stop in their implementation of tp->walk() API when empty. Most of classifiers do not adhere to that convention. Do not set arg->stop in route4_walk() to unify tp->walk() behavior among classifier implementations. Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_route.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index e590c3a2999d..444d15a75d98 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -567,10 +567,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; - if (head == NULL) - arg->stop = 1; - - if (arg->stop) + if (head == NULL || arg->stop) return; for (h = 0; h <= 256; h++) { From d66022cd1623edfd7b43e13c9e973403a811af02 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 Feb 2019 17:17:56 +0200 Subject: [PATCH 1304/1436] net: sched: matchall: verify that filter is not NULL in mall_walk() Check that filter is not NULL before passing it to tcf_walker->fn() callback. This can happen when mall_change() failed to offload filter to hardware. Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Reported-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a37137430e61..1f9d481b0fbb 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -247,6 +247,9 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg, if (arg->count < arg->skip) goto skip; + + if (!head) + return; if (arg->fn(tp, head, arg) < 0) arg->stop = 1; skip: From 8b58d12f4ae1a10037b824b1fb409cf424d6aaac Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 15 Feb 2019 17:18:44 +0200 Subject: [PATCH 1305/1436] net: sched: cgroup: verify that filter is not NULL during walk Check that filter is not NULL before passing it to tcf_walker->fn() callback in cls_cgroup_walk(). This can happen when cls_cgroup_change() failed to set first filter. Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 1cef3b416094..02b05066b635 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -158,6 +158,8 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, if (arg->count < arg->skip) goto skip; + if (!head) + return; if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; From a2703de7094211a77f458638289381b2955e0a5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 14 Feb 2019 22:15:31 +0100 Subject: [PATCH 1306/1436] net: phy: improve phy_resolve_aneg_linkmode We have the settings array of modes which is sorted based on aneg priority. Instead of checking each mode manually let's simply iterate over the sorted settings. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 43 +++++++------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index de58a59815d5..5016cd5fd7c7 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -421,45 +421,16 @@ void of_set_phy_eee_broken(struct phy_device *phydev) void phy_resolve_aneg_linkmode(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + int i; linkmode_and(common, phydev->lp_advertising, phydev->advertising); - if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, common)) { - phydev->speed = SPEED_10000; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, - common)) { - phydev->speed = SPEED_5000; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, - common)) { - phydev->speed = SPEED_2500; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - common)) { - phydev->speed = SPEED_1000; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - common)) { - phydev->speed = SPEED_1000; - phydev->duplex = DUPLEX_HALF; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - common)) { - phydev->speed = SPEED_100; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - common)) { - phydev->speed = SPEED_100; - phydev->duplex = DUPLEX_HALF; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, - common)) { - phydev->speed = SPEED_10; - phydev->duplex = DUPLEX_FULL; - } else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, - common)) { - phydev->speed = SPEED_10; - phydev->duplex = DUPLEX_HALF; - } + for (i = 0; i < ARRAY_SIZE(settings); i++) + if (test_bit(settings[i].bit, common)) { + phydev->speed = settings[i].speed; + phydev->duplex = settings[i].duplex; + break; + } if (phydev->duplex == DUPLEX_FULL) { phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, From 5502b218e00161efd20ff1a789f51a332f13f8cd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 14 Feb 2019 22:16:27 +0100 Subject: [PATCH 1307/1436] net: phy: use phy_resolve_aneg_linkmode in genphy_read_status Now that we have phy_resolve_aneg_linkmode() we can make genphy_read_status() much simpler. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a752de2fff5e..7e71124bb20e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1746,8 +1746,6 @@ int genphy_read_status(struct phy_device *phydev) int err; int lpa; int lpagb = 0; - int common_adv; - int common_adv_gb = 0; /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); @@ -1779,7 +1777,6 @@ int genphy_read_status(struct phy_device *phydev) mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb); - common_adv_gb = lpagb & adv << 2; } lpa = phy_read(phydev, MII_LPA); @@ -1792,31 +1789,12 @@ int genphy_read_status(struct phy_device *phydev) if (adv < 0) return adv; - common_adv = lpa & adv; - phydev->speed = SPEED_10; phydev->duplex = DUPLEX_HALF; phydev->pause = 0; phydev->asym_pause = 0; - if (common_adv_gb & (LPA_1000FULL | LPA_1000HALF)) { - phydev->speed = SPEED_1000; - - if (common_adv_gb & LPA_1000FULL) - phydev->duplex = DUPLEX_FULL; - } else if (common_adv & (LPA_100FULL | LPA_100HALF)) { - phydev->speed = SPEED_100; - - if (common_adv & LPA_100FULL) - phydev->duplex = DUPLEX_FULL; - } else - if (common_adv & LPA_10FULL) - phydev->duplex = DUPLEX_FULL; - - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; - } + phy_resolve_aneg_linkmode(phydev); } else { int bmcr = phy_read(phydev, MII_BMCR); From 76726ccb7f461c83040e7082cf95fe1dea2afd1f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Feb 2019 13:40:44 -0800 Subject: [PATCH 1308/1436] devlink: add flash update command Add devlink flash update command. Advanced NICs have firmware stored in flash and often cryptographically secured. Updating that flash is handled by management firmware. Ethtool has a flash update command which served us well, however, it has two shortcomings: - it takes rtnl_lock unnecessarily - really flash update has nothing to do with networking, so using a networking device as a handle is suboptimal, which leads us to the second one: - it requires a functioning netdev - in case device enters an error state and can't spawn a netdev (e.g. communication with the device fails) there is no netdev to use as a handle for flashing. Devlink already has the ability to report the firmware versions, now with the ability to update the firmware/flash we will be able to recover devices in bad state. To enable updates of sub-components of the FW allow passing component name. This name should correspond to one of the versions reported in devlink info. v1: - replace target id with component name (Jiri). Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +++ include/uapi/linux/devlink.h | 6 ++++++ net/core/devlink.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index c6d88759b7d5..18d7a051f412 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -521,6 +521,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); + int (*flash_update)(struct devlink *devlink, const char *file_name, + const char *component, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 72d9f7c89190..53de8802a000 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -103,6 +103,8 @@ enum devlink_command { DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + DEVLINK_CMD_FLASH_UPDATE, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -326,6 +328,10 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ + + DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, /* string */ + DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, /* string */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 1d7502a5a651..4a1ad0b13e52 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2662,6 +2662,27 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return devlink->ops->reload(devlink, info->extack); } +static int devlink_nl_cmd_flash_update(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *file_name, *component; + struct nlattr *nla_component; + + if (!devlink->ops->flash_update) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]) + return -EINVAL; + file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]); + + nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]; + component = nla_component ? nla_data(nla_component) : NULL; + + return devlink->ops->flash_update(devlink, file_name, component, + info->extack); +} + static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, @@ -4883,6 +4904,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -5171,6 +5194,13 @@ static const struct genl_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, + { + .cmd = DEVLINK_CMD_FLASH_UPDATE, + .doit = devlink_nl_cmd_flash_update, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { From 4eceba17200c03a2678edbdcff5d800aded607be Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Feb 2019 13:40:45 -0800 Subject: [PATCH 1309/1436] ethtool: add compat for flash update If driver does not support ethtool flash update operation call into devlink. Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 7 +++++++ net/core/devlink.c | 30 ++++++++++++++++++++++++++++++ net/core/ethtool.c | 14 ++++++++++---- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 18d7a051f412..a2da49dd9147 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1195,11 +1195,18 @@ devlink_health_report(struct devlink_health_reporter *reporter, #if IS_REACHABLE(CONFIG_NET_DEVLINK) void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len); +int devlink_compat_flash_update(struct net_device *dev, const char *file_name); #else static inline void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { } + +static inline int +devlink_compat_flash_update(struct net_device *dev, const char *file_name) +{ + return -EOPNOTSUPP; +} #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 4a1ad0b13e52..04d98550c78c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6450,6 +6450,36 @@ out: mutex_unlock(&devlink_mutex); } +int devlink_compat_flash_update(struct net_device *dev, const char *file_name) +{ + struct devlink_port *devlink_port; + struct devlink *devlink; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_port, &devlink->port_list, list) { + int ret = -EOPNOTSUPP; + + if (devlink_port->type != DEVLINK_PORT_TYPE_ETH || + devlink_port->type_dev != dev) + continue; + + mutex_unlock(&devlink_mutex); + if (devlink->ops->flash_update) + ret = devlink->ops->flash_update(devlink, + file_name, + NULL, NULL); + mutex_unlock(&devlink->lock); + return ret; + } + mutex_unlock(&devlink->lock); + } + mutex_unlock(&devlink_mutex); + + return -EOPNOTSUPP; +} + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d2c47cdf25da..1320e8dce559 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2038,12 +2038,18 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, if (copy_from_user(&efl, useraddr, sizeof(efl))) return -EFAULT; - - if (!dev->ethtool_ops->flash_device) - return -EOPNOTSUPP; - efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; + if (!dev->ethtool_ops->flash_device) { + int ret; + + rtnl_unlock(); + ret = devlink_compat_flash_update(dev, efl.data); + rtnl_lock(); + + return ret; + } + return dev->ethtool_ops->flash_device(dev, &efl); } From 5c5696f3df2acbe738526c7ba76ab7dd38e9ba49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Feb 2019 13:40:46 -0800 Subject: [PATCH 1310/1436] nfp: devlink: allow flashing the device via devlink Devlink now allows updating device flash. Implement this callback. Compared to ethtool update we no longer have to release the networking locks - devlink doesn't take them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_devlink.c | 10 +++++ drivers/net/ethernet/netronome/nfp/nfp_main.c | 41 +++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_main.h | 2 + .../ethernet/netronome/nfp/nfp_net_ethtool.c | 35 ++-------------- 4 files changed, 56 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 080a301f379e..db2da99f6aa7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -330,6 +330,15 @@ err_close_nsp: return err; } +static int +nfp_devlink_flash_update(struct devlink *devlink, const char *path, + const char *component, struct netlink_ext_ack *extack) +{ + if (component) + return -EOPNOTSUPP; + return nfp_flash_update_common(devlink_priv(devlink), path, extack); +} + const struct devlink_ops nfp_devlink_ops = { .port_split = nfp_devlink_port_split, .port_unsplit = nfp_devlink_port_unsplit, @@ -338,6 +347,7 @@ const struct devlink_ops nfp_devlink_ops = { .eswitch_mode_get = nfp_devlink_eswitch_mode_get, .eswitch_mode_set = nfp_devlink_eswitch_mode_set, .info_get = nfp_devlink_info_get, + .flash_update = nfp_devlink_flash_update, }; int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 6c10e8d119e4..f4c8776e42b6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -300,6 +300,47 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs) return nfp_pcie_sriov_enable(pdev, num_vfs); } +int nfp_flash_update_common(struct nfp_pf *pf, const char *path, + struct netlink_ext_ack *extack) +{ + struct device *dev = &pf->pdev->dev; + const struct firmware *fw; + struct nfp_nsp *nsp; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + if (extack) + NL_SET_ERR_MSG_MOD(extack, "can't access NSP"); + else + dev_err(dev, "Failed to access the NSP: %d\n", err); + return err; + } + + err = request_firmware_direct(&fw, path, dev); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "unable to read flash file from disk"); + goto exit_close_nsp; + } + + dev_info(dev, "Please be patient while writing flash image: %s\n", + path); + + err = nfp_nsp_write_flash(nsp, fw); + if (err < 0) + goto exit_release_fw; + dev_info(dev, "Finished writing flash image\n"); + err = 0; + +exit_release_fw: + release_firmware(fw); +exit_close_nsp: + nfp_nsp_close(nsp); + return err; +} + static const struct firmware * nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index a3613a2e0aa5..b7211f200d22 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -164,6 +164,8 @@ nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, unsigned int min_size, struct nfp_cpp_area **area); int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length, void *out_data, u64 out_length); +int nfp_flash_update_common(struct nfp_pf *pf, const char *path, + struct netlink_ext_ack *extack); enum nfp_dump_diag { NFP_DUMP_NSP_DIAG = 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index cb9c512abc76..8f189149efc5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1237,11 +1237,8 @@ static int nfp_net_set_channels(struct net_device *netdev, static int nfp_net_flash_device(struct net_device *netdev, struct ethtool_flash *flash) { - const struct firmware *fw; struct nfp_app *app; - struct nfp_nsp *nsp; - struct device *dev; - int err; + int ret; if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) return -EOPNOTSUPP; @@ -1250,39 +1247,13 @@ nfp_net_flash_device(struct net_device *netdev, struct ethtool_flash *flash) if (!app) return -EOPNOTSUPP; - dev = &app->pdev->dev; - - nsp = nfp_nsp_open(app->cpp); - if (IS_ERR(nsp)) { - err = PTR_ERR(nsp); - dev_err(dev, "Failed to access the NSP: %d\n", err); - return err; - } - - err = request_firmware_direct(&fw, flash->data, dev); - if (err) - goto exit_close_nsp; - - dev_info(dev, "Please be patient while writing flash image: %s\n", - flash->data); dev_hold(netdev); rtnl_unlock(); - - err = nfp_nsp_write_flash(nsp, fw); - if (err < 0) { - dev_err(dev, "Flash write failed: %d\n", err); - goto exit_rtnl_lock; - } - dev_info(dev, "Finished writing flash image\n"); - -exit_rtnl_lock: + ret = nfp_flash_update_common(app->pf, flash->data, NULL); rtnl_lock(); dev_put(netdev); - release_firmware(fw); -exit_close_nsp: - nfp_nsp_close(nsp); - return err; + return ret; } static const struct ethtool_ops nfp_net_ethtool_ops = { From 0496743b202ac0f2710ae243a493118ca5096c04 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Thu, 14 Feb 2019 14:37:16 -0800 Subject: [PATCH 1311/1436] nfp: flower: fix masks for tcp and ip flags fields Check mask fields of tcp and ip flags when setting the corresponding mask flag used in hardware. Fixes: 8f2566225ae2 ("flow_offload: add flow_rule and flow_match") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/match.c | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 1279fa5da9e1..e03c8ef2c28c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -172,46 +172,51 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + u16 tcp_flags, tcp_flags_mask; struct flow_match_tcp match; - u16 tcp_flags; flow_rule_match_tcp(rule, &match); tcp_flags = be16_to_cpu(match.key->flags); + tcp_flags_mask = be16_to_cpu(match.mask->flags); - if (tcp_flags & TCPHDR_FIN) { + if (tcp_flags & TCPHDR_FIN) ext->flags |= NFP_FL_TCP_FLAG_FIN; + if (tcp_flags_mask & TCPHDR_FIN) msk->flags |= NFP_FL_TCP_FLAG_FIN; - } - if (tcp_flags & TCPHDR_SYN) { + + if (tcp_flags & TCPHDR_SYN) ext->flags |= NFP_FL_TCP_FLAG_SYN; + if (tcp_flags_mask & TCPHDR_SYN) msk->flags |= NFP_FL_TCP_FLAG_SYN; - } - if (tcp_flags & TCPHDR_RST) { + + if (tcp_flags & TCPHDR_RST) ext->flags |= NFP_FL_TCP_FLAG_RST; + if (tcp_flags_mask & TCPHDR_RST) msk->flags |= NFP_FL_TCP_FLAG_RST; - } - if (tcp_flags & TCPHDR_PSH) { + + if (tcp_flags & TCPHDR_PSH) ext->flags |= NFP_FL_TCP_FLAG_PSH; + if (tcp_flags_mask & TCPHDR_PSH) msk->flags |= NFP_FL_TCP_FLAG_PSH; - } - if (tcp_flags & TCPHDR_URG) { + + if (tcp_flags & TCPHDR_URG) ext->flags |= NFP_FL_TCP_FLAG_URG; + if (tcp_flags_mask & TCPHDR_URG) msk->flags |= NFP_FL_TCP_FLAG_URG; - } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; flow_rule_match_control(rule, &match); - if (match.key->flags & FLOW_DIS_IS_FRAGMENT) { + if (match.key->flags & FLOW_DIS_IS_FRAGMENT) ext->flags |= NFP_FL_IP_FRAGMENTED; + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) msk->flags |= NFP_FL_IP_FRAGMENTED; - } - if (match.key->flags & FLOW_DIS_FIRST_FRAG) { + if (match.key->flags & FLOW_DIS_FIRST_FRAG) ext->flags |= NFP_FL_IP_FRAG_FIRST; + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) msk->flags |= NFP_FL_IP_FRAG_FIRST; - } } } From 31a1b8d528fa4aedaa207b38d7fafc4e9b0a0d6c Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Fri, 15 Feb 2019 00:43:27 +0100 Subject: [PATCH 1312/1436] doc: Mention MSG_ZEROCOPY implementation for UDP MSG_ZEROCOPY implementation for UDP was merged in v5.0, 6e360f733113 ("Merge branch 'udp-msg_zerocopy'"). Signed-off-by: Petr Vorel Signed-off-by: David S. Miller --- Documentation/networking/msg_zerocopy.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index fe46d4867e2d..18c1415e7bfa 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -7,7 +7,7 @@ Intro ===== The MSG_ZEROCOPY flag enables copy avoidance for socket send calls. -The feature is currently implemented for TCP sockets. +The feature is currently implemented for TCP and UDP sockets. Opportunity and Caveats From a0bc653b1d48671caa9d22a44ff98050f8efd3ea Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 15 Feb 2019 02:36:47 +0000 Subject: [PATCH 1313/1436] net: dsa: bcm_sf2: Remove set but not used variables 'v6_spec, v6_m_spec' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/dsa/bcm_sf2_cfp.c: In function 'bcm_sf2_cfp_ipv6_rule_set': drivers/net/dsa/bcm_sf2_cfp.c:606:40: warning: variable 'v6_m_spec' set but not used [-Wunused-but-set-variable] drivers/net/dsa/bcm_sf2_cfp.c:606:30: warning: variable 'v6_spec' set but not used [-Wunused-but-set-variable] It not used any more after commit e4f7ef54cbd8 ("dsa: bcm_sf2: use flow_rule infrastructure") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 0b9ca4bdf47e..e6234d209787 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -603,7 +603,6 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, unsigned int queue_num, struct ethtool_rx_flow_spec *fs) { - struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; struct ethtool_rx_flow_spec_input input = {}; unsigned int slice_num, rule_index[2]; const struct cfp_udf_layout *layout; @@ -618,13 +617,9 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, switch (fs->flow_type & ~FLOW_EXT) { case TCP_V6_FLOW: ip_proto = IPPROTO_TCP; - v6_spec = &fs->h_u.tcp_ip6_spec; - v6_m_spec = &fs->m_u.tcp_ip6_spec; break; case UDP_V6_FLOW: ip_proto = IPPROTO_UDP; - v6_spec = &fs->h_u.udp_ip6_spec; - v6_m_spec = &fs->m_u.udp_ip6_spec; break; default: return -EINVAL; From 56425638839cfb038adeba3fb5ff6a4466ec37f9 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 15 Feb 2019 09:33:47 +0100 Subject: [PATCH 1314/1436] net: phy: marvell10g: Don't explicitly set Pause and Asym_Pause The PHY core expects PHY drivers not to set Pause and Asym_Pause bits, unless the driver only wants to specify one of them due to HW limitation. In the case of the Marvell10g driver, we don't need to set them. Signed-off-by: Maxime Chevallier Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index b83eb19cf8bb..f9e0a2fc0277 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -242,9 +242,6 @@ static int mv3310_config_init(struct phy_device *phydev) phydev->interface != PHY_INTERFACE_MODE_10GKR) return -ENODEV; - __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); - __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); - if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); if (val < 0) From 4012e7d09d99b62d80046790657c0b0e32310d50 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Fri, 15 Feb 2019 10:49:09 +0100 Subject: [PATCH 1315/1436] net: stmmac: handle endianness in dwmac4_get_timestamp GMAC IP is little-endian and used on several kind of CPU (big or little endian). Main callbacks functions of the stmmac drivers take care about it. It was not the case for dwmac4_get_timestamp function. Fixes: ba1ffd74df74 ("stmmac: fix PTP support for GMAC4") Signed-off-by: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 20299f6f65fc..736e29635b77 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -241,15 +241,18 @@ static inline void dwmac4_get_timestamp(void *desc, u32 ats, u64 *ts) static int dwmac4_rx_check_timestamp(void *desc) { struct dma_desc *p = (struct dma_desc *)desc; + unsigned int rdes0 = le32_to_cpu(p->des0); + unsigned int rdes1 = le32_to_cpu(p->des1); + unsigned int rdes3 = le32_to_cpu(p->des3); u32 own, ctxt; int ret = 1; - own = p->des3 & RDES3_OWN; - ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + own = rdes3 & RDES3_OWN; + ctxt = ((rdes3 & RDES3_CONTEXT_DESCRIPTOR) >> RDES3_CONTEXT_DESCRIPTOR_SHIFT); if (likely(!own && ctxt)) { - if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + if ((rdes0 == 0xffffffff) && (rdes1 == 0xffffffff)) /* Corrupted value */ ret = -EINVAL; else From aaeb1dea514a47e25b4dc05deb54fc4dc538d1d5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 15 Feb 2019 11:23:25 +0100 Subject: [PATCH 1316/1436] net: sched: sch_api: set an error msg when qdisc_alloc_handle() fails This patch sets an error message in extack when the number of qdisc handles exceeds the maximum. Also the error-code ENOSPC is more appropriate than ENOMEM in this situation. Signed-off-by: Ivan Vecera Reported-by: Li Shuang Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/sched/sch_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2283924fb56d..b8a388e4bcc4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1201,9 +1201,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev, } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); - err = -ENOMEM; - if (handle == 0) + if (handle == 0) { + NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); + err = -ENOSPC; goto err_out3; + } } if (!netif_is_multiqueue(dev)) sch->flags |= TCQ_F_ONETXQUEUE; From 97dc47a1308a3af46a09b1546cfb869f2e382a81 Mon Sep 17 00:00:00 2001 From: Beniamino Galvani Date: Fri, 15 Feb 2019 13:20:42 +0100 Subject: [PATCH 1317/1436] qmi_wwan: apply SET_DTR quirk to Sierra WP7607 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1199:68C0 USB ID is reused by Sierra WP7607 which requires the DTR quirk to be detected. Apply QMI_QUIRK_SET_DTR unconditionally as already done for other IDs shared between different devices. Signed-off-by: Beniamino Galvani Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 735ad838e2ba..18af2f8eee96 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1201,8 +1201,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */ - {QMI_FIXED_INTF(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC7304/MC7354 */ - {QMI_FIXED_INTF(0x1199, 0x68c0, 10)}, /* Sierra Wireless MC7304/MC7354 */ + {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC7304/MC7354, WP76xx */ + {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 10)},/* Sierra Wireless MC7304/MC7354 */ {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ From e928b5d6b75e239feb9c6d5488974b6646a0ebc8 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 16 Feb 2019 00:20:54 +0300 Subject: [PATCH 1318/1436] net: mv643xx_eth: disable clk on error path in mv643xx_eth_shared_probe() If mv643xx_eth_shared_of_probe() fails, mv643xx_eth_shared_probe() leaves clk enabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 2f427271a793..292a668ce88e 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2879,7 +2879,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_probe(pdev); if (ret) - return ret; + goto err_put_clk; pd = dev_get_platdata(&pdev->dev); msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ? @@ -2887,6 +2887,11 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) infer_hw_params(msp); return 0; + +err_put_clk: + if (!IS_ERR(msp->clk)) + clk_disable_unprepare(msp->clk); + return ret; } static int mv643xx_eth_shared_remove(struct platform_device *pdev) From 04c03114be82194d4a4858d41dba8e286ad1787c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Feb 2019 13:36:20 -0800 Subject: [PATCH 1319/1436] tcp: clear icsk_backoff in tcp_write_queue_purge() soukjin bae reported a crash in tcp_v4_err() handling ICMP_DEST_UNREACH after tcp_write_queue_head(sk) returned a NULL pointer. Current logic should have prevented this : if (seq != tp->snd_una || !icsk->icsk_retransmits || !icsk->icsk_backoff || fastopen) break; Problem is the write queue might have been purged and icsk_backoff has not been cleared. Signed-off-by: Eric Dumazet Reported-by: soukjin bae Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2079145a3b7c..cf3c5095c10e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2528,6 +2528,7 @@ void tcp_write_queue_purge(struct sock *sk) sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_sk(sk)->packets_out = 0; + inet_csk(sk)->icsk_backoff = 0; } int tcp_disconnect(struct sock *sk, int flags) @@ -2576,7 +2577,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; - icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; From 2c4cc9712364c051b1de2d175d5fbea6be948ebf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Feb 2019 13:36:21 -0800 Subject: [PATCH 1320/1436] tcp: tcp_v4_err() should be more careful ICMP handlers are not very often stressed, we should make them more resilient to bugs that might surface in the future. If there is no packet in retransmit queue, we should avoid a NULL deref. Signed-off-by: Eric Dumazet Reported-by: soukjin bae Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index efc6fef692ff..ec3cea9d6828 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -536,12 +536,15 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (sock_owned_by_user(sk)) break; + skb = tcp_rtx_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + break; + icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - skb = tcp_rtx_queue_head(sk); tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); From 8644772637deb121f7ac2df690cbf83fa63d3b70 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 15 Feb 2019 14:44:12 -0800 Subject: [PATCH 1321/1436] mm: Use fixed constant in page_frag_alloc instead of size + 1 This patch replaces the size + 1 value introduced with the recent fix for 1 byte allocs with a constant value. The idea here is to reduce code overhead as the previous logic would have to read size into a register, then increment it, and write it back to whatever field was being used. By using a constant we can avoid those memory reads and arithmetic operations in favor of just encoding the maximum value into the operation itself. Fixes: 2c2ade81741c ("mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 46285d28e43b..7f79b78bc829 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4675,11 +4675,11 @@ refill: /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ - page_ref_add(page, size); + page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page_is_pfmemalloc(page); - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = size; } @@ -4695,10 +4695,10 @@ refill: size = nc->size; #endif /* OK, page count is 0, we can safely set it */ - set_page_count(page, size + 1); + set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size + 1; + nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; } From 3bed3cc4156eedf652b4df72bdb35d4f1a2a739d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 15 Feb 2019 14:44:18 -0800 Subject: [PATCH 1322/1436] net: Do not allocate page fragments that are not skb aligned This patch addresses the fact that there are drivers, specifically tun, that will call into the network page fragment allocators with buffer sizes that are not cache aligned. Doing this could result in data alignment and DMA performance issues as these fragment pools are also shared with the skb allocator and any other devices that will use napi_alloc_frags or netdev_alloc_frags. Fixes: ffde7328a36d ("net: Split netdev_alloc_frag into __alloc_page_frag and add __napi_alloc_frag") Reported-by: Jann Horn Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 26d848484912..2415d9cb9b89 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -356,6 +356,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) */ void *netdev_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __netdev_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(netdev_alloc_frag); @@ -369,6 +371,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) void *napi_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __napi_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(napi_alloc_frag); From 3edaded89649d042ef4060cecc42f5599f3054b6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 16 Feb 2019 01:48:09 +0000 Subject: [PATCH 1323/1436] net: sgi: use GFP_ATOMIC under spin lock The function meth_init_tx_ring() is called from meth_tx_timeout(), in which spin_lock is held, so we should use GFP_ATOMIC instead. Fixes: 8d4c28fbc284 ("meth: pass struct device to DMA API functions") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/sgi/meth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index f425ab528224..f1271402ca21 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -214,7 +214,7 @@ static int meth_init_tx_ring(struct meth_private *priv) { /* Init TX ring */ priv->tx_ring = dma_alloc_coherent(&priv->pdev->dev, - TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_KERNEL); + TX_RING_BUFFER_SIZE, &priv->tx_ring_dma, GFP_ATOMIC); if (!priv->tx_ring) return -ENOMEM; From 58ecf2688cc9b44d2e8f830c16212edbeaef4dce Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 16 Feb 2019 10:37:56 +0800 Subject: [PATCH 1324/1436] ptr_ring: remove duplicated include from ptr_ring.h Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 186cd8e970c7..8da46ac44a2e 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #endif From e511f17b1fb40248e63677a6ab81a29b9b32080d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 16 Feb 2019 08:15:52 +0000 Subject: [PATCH 1325/1436] net: hns3: make function hclge_set_all_vf_rst() static Fixes the following sparse warning: drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c:2431:5: warning: symbol 'hclge_set_all_vf_rst' was not declared. Should it be static? Fixes: aa5c4f175be6 ("net: hns3: add reset handling for VF when doing PF reset") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 0a64c7f2dc6c..75ec3094bbf9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2599,7 +2599,7 @@ static int hclge_set_vf_rst(struct hclge_dev *hdev, int func_id, bool reset) return hclge_cmd_send(&hdev->hw, &desc, 1); } -int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) +static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) { int i; From a3b22b9f11d9fbc48b0291ea92259a5a810e9438 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Feb 2019 18:46:40 -0800 Subject: [PATCH 1326/1436] Linux 5.0-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 86cf35d1d79d..96c5335e7ee4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shy Crocodile # *DOCUMENTATION* From 4f0557795e76d049f0a1687f1f050addf4df2dac Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 4 Feb 2019 15:07:06 +0100 Subject: [PATCH 1327/1436] mailbox: Export mbox_flush() The mbox_flush() function can be used by drivers that are built as modules, so the function needs to be exported. Reported-by: Mark Brown Signed-off-by: Thierry Reding Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index c6a7d4582dc6..38d9df3fb199 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -310,6 +310,7 @@ int mbox_flush(struct mbox_chan *chan, unsigned long timeout) return ret; } +EXPORT_SYMBOL_GPL(mbox_flush); /** * mbox_request_channel - Request a mailbox channel. From d7bf31a0f85faaf63c63c39d55154825a1eaaea9 Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Mon, 4 Feb 2019 11:21:29 -0800 Subject: [PATCH 1328/1436] mailbox: bcm-flexrm-mailbox: Fix FlexRM ring flush timeout issue RING_CONTROL reg was not written due to wrong address, hence all the subsequent ring flush was timing out. Fixes: a371c10ea4b3 ("mailbox: bcm-flexrm-mailbox: Fix FlexRM ring flush sequence") Signed-off-by: Rayagonda Kokatanur Signed-off-by: Ray Jui Reviewed-by: Scott Branden Signed-off-by: Jassi Brar --- drivers/mailbox/bcm-flexrm-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index d713271ebf7c..a64116586b4c 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c +++ b/drivers/mailbox/bcm-flexrm-mailbox.c @@ -1396,9 +1396,9 @@ static void flexrm_shutdown(struct mbox_chan *chan) /* Clear ring flush state */ timeout = 1000; /* timeout of 1s */ - writel_relaxed(0x0, ring + RING_CONTROL); + writel_relaxed(0x0, ring->regs + RING_CONTROL); do { - if (!(readl_relaxed(ring + RING_FLUSH_DONE) & + if (!(readl_relaxed(ring->regs + RING_FLUSH_DONE) & FLUSH_DONE_MASK)) break; mdelay(1); From 9060cb719e61b685ec0102574e10337fa5f445ea Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Mon, 18 Feb 2019 10:44:44 +0800 Subject: [PATCH 1329/1436] net: crypto set sk to NULL when af_alg_release. KASAN has found use-after-free in sockfs_setattr. The existed commit 6d8c50dcb029 ("socket: close race condition between sock_close() and sockfs_setattr()") is to fix this simillar issue, but it seems to ignore that crypto module forgets to set the sk to NULL after af_alg_release. KASAN report details as below: BUG: KASAN: use-after-free in sockfs_setattr+0x120/0x150 Write of size 4 at addr ffff88837b956128 by task syz-executor0/4186 CPU: 2 PID: 4186 Comm: syz-executor0 Not tainted xxx + #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0xca/0x13e print_address_description+0x79/0x330 ? vprintk_func+0x5e/0xf0 kasan_report+0x18a/0x2e0 ? sockfs_setattr+0x120/0x150 sockfs_setattr+0x120/0x150 ? sock_register+0x2d0/0x2d0 notify_change+0x90c/0xd40 ? chown_common+0x2ef/0x510 chown_common+0x2ef/0x510 ? chmod_common+0x3b0/0x3b0 ? __lock_is_held+0xbc/0x160 ? __sb_start_write+0x13d/0x2b0 ? __mnt_want_write+0x19a/0x250 do_fchownat+0x15c/0x190 ? __ia32_sys_chmod+0x80/0x80 ? trace_hardirqs_on_thunk+0x1a/0x1c __x64_sys_fchownat+0xbf/0x160 ? lockdep_hardirqs_on+0x39a/0x5e0 do_syscall_64+0xc8/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462589 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb4b2c83c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000104 RAX: ffffffffffffffda RBX: 000000000072bfa0 RCX: 0000000000462589 RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000007 RBP: 0000000000000005 R08: 0000000000001000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb4b2c846bc R13: 00000000004bc733 R14: 00000000006f5138 R15: 00000000ffffffff Allocated by task 4185: kasan_kmalloc+0xa0/0xd0 __kmalloc+0x14a/0x350 sk_prot_alloc+0xf6/0x290 sk_alloc+0x3d/0xc00 af_alg_accept+0x9e/0x670 hash_accept+0x4a3/0x650 __sys_accept4+0x306/0x5c0 __x64_sys_accept4+0x98/0x100 do_syscall_64+0xc8/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4184: __kasan_slab_free+0x12e/0x180 kfree+0xeb/0x2f0 __sk_destruct+0x4e6/0x6a0 sk_destruct+0x48/0x70 __sk_free+0xa9/0x270 sk_free+0x2a/0x30 af_alg_release+0x5c/0x70 __sock_release+0xd3/0x280 sock_close+0x1a/0x20 __fput+0x27f/0x7f0 task_work_run+0x136/0x1b0 exit_to_usermode_loop+0x1a7/0x1d0 do_syscall_64+0x461/0x580 entry_SYSCALL_64_after_hwframe+0x49/0xbe Syzkaller reproducer: r0 = perf_event_open(&(0x7f0000000000)={0x0, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_config_ext}, 0x0, 0x0, 0xffffffffffffffff, 0x0) r1 = socket$alg(0x26, 0x5, 0x0) getrusage(0x0, 0x0) bind(r1, &(0x7f00000001c0)=@alg={0x26, 'hash\x00', 0x0, 0x0, 'sha256-ssse3\x00'}, 0x80) r2 = accept(r1, 0x0, 0x0) r3 = accept4$unix(r2, 0x0, 0x0, 0x0) r4 = dup3(r3, r0, 0x0) fchownat(r4, &(0x7f00000000c0)='\x00', 0x0, 0x0, 0x1000) Fixes: 6d8c50dcb029 ("socket: close race condition between sock_close() and sockfs_setattr()") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- crypto/af_alg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 17eb09d222ff..ec78a04eb136 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -122,8 +122,10 @@ static void alg_do_release(const struct af_alg_type *type, void *private) int af_alg_release(struct socket *sock) { - if (sock->sk) + if (sock->sk) { sock_put(sock->sk); + sock->sk = NULL; + } return 0; } EXPORT_SYMBOL_GPL(af_alg_release); From 21d2cb491b9e10bfdf10424673b43cd9eddc2da1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Feb 2019 23:03:31 +0000 Subject: [PATCH 1330/1436] net/mlx4_en: fix spelling mistake: "quiting" -> "quitting" There is a spelling mistake in a en_err error message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 6b88881b8e35..c1438ae52a11 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3360,7 +3360,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->addr_len = ETH_ALEN; mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); if (!is_valid_ether_addr(dev->dev_addr)) { - en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", + en_err(priv, "Port: %d, invalid mac burned: %pM, quitting\n", priv->port, dev->dev_addr); err = -EINVAL; goto out; From ed95799bd47480cc0571749ed1d2d85813163107 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Feb 2019 15:35:25 +0900 Subject: [PATCH 1331/1436] net: hamradio: remove unused hweight*() defines This file does not use hweight*() at all, and the definition is surrounded by #if 0 ... #endif. Signed-off-by: Masahiro Yamada Signed-off-by: David S. Miller --- drivers/net/hamradio/baycom_ser_fdx.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 190f66c88479..ed0841630990 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -203,32 +203,6 @@ static inline void ser12_set_divisor(struct net_device *dev, */ } -/* --------------------------------------------------------------------- */ - -#if 0 -static inline unsigned int hweight16(unsigned int w) - __attribute__ ((unused)); -static inline unsigned int hweight8(unsigned int w) - __attribute__ ((unused)); - -static inline unsigned int hweight16(unsigned int w) -{ - unsigned short res = (w & 0x5555) + ((w >> 1) & 0x5555); - res = (res & 0x3333) + ((res >> 2) & 0x3333); - res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F); - return (res & 0x00FF) + ((res >> 8) & 0x00FF); -} - -static inline unsigned int hweight8(unsigned int w) -{ - unsigned short res = (w & 0x55) + ((w >> 1) & 0x55); - res = (res & 0x33) + ((res >> 2) & 0x33); - return (res & 0x0F) + ((res >> 4) & 0x0F); -} -#endif - -/* --------------------------------------------------------------------- */ - static __inline__ void ser12_rx(struct net_device *dev, struct baycom_state *bc, struct timespec64 *ts, unsigned char curs) { int timediff; From 31ef5b0eef2afb2df23728f2b88f0c245617f258 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Mon, 18 Feb 2019 07:19:44 +0000 Subject: [PATCH 1332/1436] mlxsw: spectrum: Change IP2ME CPU policer rate and burst size values The IP2ME packet trap is triggered by packets hitting local routes. After evaluating current defaults used by the driver it was decided to reduce the amount of traffic generated by this trap to 1Kpps and increase the burst size. This is inline with similarly deployed systems. Signed-off-by: Shalom Toledo Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index abac923a8d04..bc349d8ca08a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3739,8 +3739,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: - rate = 4 * 1024; - burst_size = 4; + rate = 1024; + burst_size = 7; break; default: continue; From f2ffff085d287eec499f1fccd682796ad8010303 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 18 Feb 2019 11:29:29 +0100 Subject: [PATCH 1333/1436] mac80211: mesh: fix missing unlock on error in table_path_del() spin_lock_bh() is used in table_path_del() but rcu_read_unlock() is used for unlocking. Fix it by using spin_unlock_bh() instead of rcu_read_unlock() in the error handling case. Fixes: b4c3fbe63601 ("mac80211: Use linked list instead of rhashtable walk for mesh tables") Acked-by: Herbert Xu Signed-off-by: Wei Yongjun Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index c3a7396fb955..88a6d5e18ccc 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -627,7 +627,7 @@ static int table_path_del(struct mesh_table *tbl, spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); if (!mpath) { - rcu_read_unlock(); + spin_unlock_bh(&tbl->walk_lock); return -ENXIO; } From 780feae7eb69388c8d8b661cda6706b0dc0f642b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 16 Feb 2019 10:59:35 +0800 Subject: [PATCH 1334/1436] mdio_bus: Fix PTR_ERR() usage after initialization to constant Fix coccinelle warning: ./drivers/net/phy/mdio_bus.c:51:5-12: ERROR: PTR_ERR applied after initialization to constant on line 44 ./drivers/net/phy/mdio_bus.c:52:5-12: ERROR: PTR_ERR applied after initialization to constant on line 44 fix this by using IS_ERR before PTR_ERR Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 3d313585c9c1..debc74c90307 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -48,11 +48,12 @@ static int mdiobus_register_gpiod(struct mdio_device *mdiodev) gpiod = fwnode_get_named_gpiod(&mdiodev->dev.of_node->fwnode, "reset-gpios", 0, GPIOD_OUT_LOW, "PHY reset"); - if (PTR_ERR(gpiod) == -ENOENT || - PTR_ERR(gpiod) == -ENOSYS) - gpiod = NULL; - else if (IS_ERR(gpiod)) - return PTR_ERR(gpiod); + if (IS_ERR(gpiod)) { + if (PTR_ERR(gpiod) == -ENOENT || PTR_ERR(gpiod) == -ENOSYS) + gpiod = NULL; + else + return PTR_ERR(gpiod); + } mdiodev->reset = gpiod; From 6e07902f56fae81036ebc5a0b61d5ffdd739e4bc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 16 Feb 2019 08:19:55 +0000 Subject: [PATCH 1335/1436] net: sched: using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sched/sch_api.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b8a388e4bcc4..352b46f98440 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -526,11 +526,6 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return stab; } -static void stab_kfree_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct qdisc_size_table, rcu)); -} - void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) @@ -538,7 +533,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - call_rcu(&tab->rcu, stab_kfree_rcu); + kfree_rcu(tab, rcu); } } EXPORT_SYMBOL(qdisc_put_stab); From 57fd967838c665544f425c4a66ead1259a9ad0dc Mon Sep 17 00:00:00 2001 From: Rundong Ge Date: Sat, 16 Feb 2019 08:35:24 +0000 Subject: [PATCH 1336/1436] net: dsa: Implement flow_dissect callback for tag_dsa. RPS not work for DSA devices since the 'skb_get_hash' will always get the invalid hash for dsa tagged packets. "[PATCH] tag_mtk: add flow_dissect callback to the ops struct" introduced the flow_dissect callback to get the right hash for MTK tagged packet. Tag_dsa and tag_edsa also need to implement the callback. Signed-off-by: Rundong Ge Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/tag_dsa.c | 9 +++++++++ net/dsa/tag_edsa.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 8b2f92e3f3a2..67ff3fae18d8 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -146,8 +146,17 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int dsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = 4; + *proto = ((__be16 *)skb->data)[1]; + return 0; +} + const struct dsa_device_ops dsa_netdev_ops = { .xmit = dsa_xmit, .rcv = dsa_rcv, + .flow_dissect = dsa_tag_flow_dissect, .overhead = DSA_HLEN, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index f5b87ee5c94e..234585ec116e 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -165,8 +165,17 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } +static int edsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = 8; + *proto = ((__be16 *)skb->data)[3]; + return 0; +} + const struct dsa_device_ops edsa_netdev_ops = { .xmit = edsa_xmit, .rcv = edsa_rcv, + .flow_dissect = edsa_tag_flow_dissect, .overhead = EDSA_HLEN, }; From 8e29d23e28ee7fb995a00c1ca7e1a4caf5070b12 Mon Sep 17 00:00:00 2001 From: David Chen Date: Sat, 16 Feb 2019 17:16:42 +0800 Subject: [PATCH 1337/1436] r8152: Add support for MAC address pass through on RTL8153-BD RTL8153-BD is used in Dell DA300 type-C dongle. It should be added to the whitelist of devices to activate MAC address pass through. Per confirming with Realtek all devices containing RTL8153-BD should activate MAC pass through and there won't use pass through bit on efuse like in RTL8153-AD. Signed-off-by: David Chen Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 60dd1ec1665f..ada6baf8847a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -557,6 +557,7 @@ enum spd_duplex { /* MAC PASSTHRU */ #define AD_MASK 0xfee0 #define BND_MASK 0x0004 +#define BD_MASK 0x0001 #define EFUSE 0xcfdb #define PASS_THRU_MASK 0x1 @@ -1176,9 +1177,9 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) return -ENODEV; } } else { - /* test for RTL8153-BND */ + /* test for RTL8153-BND and RTL8153-BD */ ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1); - if ((ocp_data & BND_MASK) == 0) { + if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK)) { netif_dbg(tp, probe, tp->netdev, "Invalid variant for MAC pass through\n"); return -ENODEV; From eb160971af7a2cd807a76adea44e924ebfa05708 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 10:20:15 +0100 Subject: [PATCH 1338/1436] r8169: remove unneeded mmiowb barriers writex() has implicit barriers, that's what makes it different from writex_relaxed(). Therefore these calls to mmiowb() can be removed. This patch was recently reverted due to a dependency with another problematic patch. But because it didn't contribute to the problem it was rebased and can be resubmitted. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 10bab1f7150a..c29dde064078 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1288,13 +1288,11 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) { RTL_W16(tp, IntrStatus, bits); - mmiowb(); } static void rtl_irq_disable(struct rtl8169_private *tp) { RTL_W16(tp, IntrMask, 0); - mmiowb(); } #define RTL_EVENT_NAPI_RX (RxOK | RxErr) @@ -6251,8 +6249,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, RTL_W8(tp, TxPoll, NPQ); - mmiowb(); - if (!rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) { /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must * not miss a ring update when it notices a stopped queue. @@ -6597,9 +6593,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - rtl_irq_enable(tp); - mmiowb(); } return work_done; From bf9d787ba7ea87c5c676a73a0ba191edd5a0341a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 16 Feb 2019 17:53:10 +0800 Subject: [PATCH 1339/1436] liquidio: using NULL instead of plain integer Fix following warning: drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c:1453:35: warning: Using plain integer as NULL pointer drivers/net/ethernet/cavium/liquidio/lio_main.c:2910:23: warning: Using plain integer as NULL pointer Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c | 2 +- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 9f4f3c1d5043..43d11c38b38a 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1450,7 +1450,7 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, mbox_cmd.recv_len = 0; mbox_cmd.recv_status = 0; mbox_cmd.fn = NULL; - mbox_cmd.fn_arg = 0; + mbox_cmd.fn_arg = NULL; ether_addr_copy(mbox_cmd.msg.s.params, mac); mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf; octeon_mbox_write(oct, &mbox_cmd); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e97e6754ee09..9b7819fdc9de 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2907,7 +2907,7 @@ static int liquidio_set_vf_spoofchk(struct net_device *netdev, int vfidx, nctrl.ncmd.s.param2 = enable; nctrl.ncmd.s.more = 0; nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.cb_fn = 0; + nctrl.cb_fn = NULL; retval = octnet_send_nic_ctrl_pkt(oct, &nctrl); From 9004a14cb688c69194002aa2fadda4433c3b79fb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 17:26:05 +0100 Subject: [PATCH 1340/1436] net: phy: add helper mii_10gbt_stat_mod_linkmode_lpa_t Similar to the existing helpers for the Clause 22 registers add helper mii_10gbt_stat_mod_linkmode_lpa_t. Note that this helper is defined in linux/mdio.h, not like the Clause 22 helpers in linux/mii.h. Reason is that the Clause 45 register constants are defined in uapi/linux/mdio.h. And uapi/linux/mdio.h includes linux/mii.h before defining the C45 register constants. v2: - remove helpers that don't have users in this series Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index dd46828b4c47..3e99ae3ed87f 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -286,6 +286,25 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising) return result; } +/** + * mii_10gbt_stat_mod_linkmode_lpa_t + * @advertising: target the linkmode advertisement settings + * @adv: value of the C45 10GBASE-T AN STATUS register + * + * A small helper function that translates C45 10GBASE-T AN STATUS register bits + * to linkmode advertisement settings. Other bits in advertising aren't changed. + */ +static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, + u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP2_5G); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP5G); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP10G); +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); From 96c2be34e6ceb8141015daac1a749aafce95e1bd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 17:26:50 +0100 Subject: [PATCH 1341/1436] net: phy: use mii_10gbt_stat_mod_linkmode_lpa_t in genphy_c45_read_lpa Use mii_10gbt_stat_mod_linkmode_lpa_t() in genphy_c45_read_lpa() to simplify the code. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index e17672bd180e..1cf5e8ae46de 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -248,15 +248,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev) if (val < 0) return val; - if (val & MDIO_AN_10GBT_STAT_LP2_5G) - linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, - phydev->lp_advertising); - if (val & MDIO_AN_10GBT_STAT_LP5G) - linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, - phydev->lp_advertising); - if (val & MDIO_AN_10GBT_STAT_LP10G) - linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - phydev->lp_advertising); + mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, val); return 0; } From c9b747dbc2036c917b1067fbb78dc38b105c4454 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 18 Feb 2019 12:19:54 +0000 Subject: [PATCH 1342/1436] bnx2x: Remove set but not used variable 'mfw_vn' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c: In function 'bnx2x_get_hwinfo': drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c:11940:10: warning: variable 'mfw_vn' set but not used [-Wunused-but-set-variable] It's never used since introduction. Signed-off-by: YueHaibing Acked-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0cec82450e19..626b491f7674 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11998,7 +11998,7 @@ static void validate_set_si_mode(struct bnx2x *bp) static int bnx2x_get_hwinfo(struct bnx2x *bp) { int /*abs*/func = BP_ABS_FUNC(bp); - int vn, mfw_vn; + int vn; u32 val = 0, val2 = 0; int rc = 0; @@ -12083,12 +12083,10 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp) /* * Initialize MF configuration */ - bp->mf_ov = 0; bp->mf_mode = 0; bp->mf_sub_mode = 0; vn = BP_VN(bp); - mfw_vn = BP_FW_MB_IDX(bp); if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) { BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n", From b5372fe5dc84235dbe04998efdede3c4daa866a9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Feb 2019 16:36:48 -0800 Subject: [PATCH 1343/1436] exec: load_script: Do not exec truncated interpreter path Commit 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string") was trying to protect against a confused exec of a truncated interpreter path. However, it was overeager and also refused to truncate arguments as well, which broke userspace, and it was reverted. This attempts the protection again, but allows arguments to remain truncated. In an effort to improve readability, helper functions and comments have been added. Co-developed-by: Linus Torvalds Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Oleg Nesterov Cc: Samuel Dionne-Riel Cc: Richard Weinberger Cc: Graham Christensen Cc: Michal Hocko Signed-off-by: Linus Torvalds --- fs/binfmt_script.c | 57 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..e996174cbfc0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -14,13 +14,30 @@ #include #include +static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } +static inline char *next_non_spacetab(char *first, const char *last) +{ + for (; first <= last; first++) + if (!spacetab(*first)) + return first; + return NULL; +} +static inline char *next_terminator(char *first, const char *last) +{ + for (; first <= last; first++) + if (spacetab(*first) || !*first) + return first; + return NULL; +} + static int load_script(struct linux_binprm *bprm) { const char *i_arg, *i_name; - char *cp; + char *cp, *buf_end; struct file *file; int retval; + /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; @@ -33,18 +50,40 @@ static int load_script(struct linux_binprm *bprm) if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; - /* - * This section does the #! interpretation. - * Sorta complicated, but hopefully it will work. -TYT - */ - + /* Release since we are not mapping a binary into memory. */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + /* + * This section handles parsing the #! line into separate + * interpreter path and argument strings. We must be careful + * because bprm->buf is not yet guaranteed to be NUL-terminated + * (though the buffer will have trailing NUL padding when the + * file size was smaller than the buffer size). + * + * We do not want to exec a truncated interpreter path, so either + * we find a newline (which indicates nothing is truncated), or + * we find a space/tab/NUL after the interpreter path (which + * itself may be preceded by spaces/tabs). Truncating the + * arguments is fine: the interpreter can re-read the script to + * parse them on its own. + */ + buf_end = bprm->buf + sizeof(bprm->buf) - 1; + cp = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); + if (!cp) { + cp = next_non_spacetab(bprm->buf + 2, buf_end); + if (!cp) + return -ENOEXEC; /* Entire buf is spaces/tabs */ + /* + * If there is no later space/tab/NUL we must assume the + * interpreter path is truncated. + */ + if (!next_terminator(cp, buf_end)) + return -ENOEXEC; + cp = buf_end; + } + /* NUL-terminate the buffer and any trailing spaces/tabs. */ *cp = '\0'; while (cp > bprm->buf) { cp--; From 9addc92730df55e2c05e8d3f69267a89d65bcba8 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 18 Feb 2019 15:24:02 +0200 Subject: [PATCH 1344/1436] qed: Fix iWARP buffer size provided for syn packet processing. The assumption that the maximum size of a syn packet is 128 bytes is wrong. Tunneling headers were not accounted for. Allocate buffers large enough for mtu. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 12 ++++++------ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index beb8e5d6401a..e84fb01b91fd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2605,7 +2605,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; - u32 mpa_buff_size; + u32 buff_size; u16 n_ooo_bufs; int rc = 0; int i; @@ -2632,7 +2632,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, memset(&data, 0, sizeof(data)); data.input.conn_type = QED_LL2_TYPE_IWARP; - data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE; + data.input.mtu = params->max_mtu; data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE; data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE; data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ @@ -2654,9 +2654,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, goto err; } + buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu); rc = qed_iwarp_ll2_alloc_buffers(p_hwfn, QED_IWARP_LL2_SYN_RX_SIZE, - QED_IWARP_MAX_SYN_PKT_SIZE, + buff_size, iwarp_info->ll2_syn_handle); if (rc) goto err; @@ -2710,10 +2711,9 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, if (rc) goto err; - mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu); rc = qed_iwarp_ll2_alloc_buffers(p_hwfn, data.input.rx_num_desc, - mpa_buff_size, + buff_size, iwarp_info->ll2_mpa_handle); if (rc) goto err; @@ -2726,7 +2726,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; - iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL); + iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL); if (!iwarp_info->mpa_intermediate_buf) goto err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index b8f612d00241..7ac959038324 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -46,7 +46,6 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); #define QED_IWARP_LL2_SYN_TX_SIZE (128) #define QED_IWARP_LL2_SYN_RX_SIZE (256) -#define QED_IWARP_MAX_SYN_PKT_SIZE (128) #define QED_IWARP_LL2_OOO_DEF_TX_SIZE (256) #define QED_IWARP_MAX_OOO (16) From 8be3dadf04050c2907760ec1955ca1c8fbc25585 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 18 Feb 2019 15:24:03 +0200 Subject: [PATCH 1345/1436] qed: Fix iWARP syn packet mac address validation. The ll2 forwards all syn packets to the driver without validating the mac address. Add validation check in the driver's iWARP listener flow and drop the packet if it isn't intended for the device. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index e84fb01b91fd..ded556b7bab5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1688,6 +1688,15 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + if (!ether_addr_equal(ethh->h_dest, + p_hwfn->p_rdma_info->iwarp.mac_addr)) { + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "Got unexpected mac %pM instead of %pM\n", + ethh->h_dest, p_hwfn->p_rdma_info->iwarp.mac_addr); + return -EINVAL; + } + ether_addr_copy(remote_mac_addr, ethh->h_source); ether_addr_copy(local_mac_addr, ethh->h_dest); From 8a7493e58ad688eb23b81e45461c5d314f4402f1 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 18 Feb 2019 14:35:03 +0100 Subject: [PATCH 1346/1436] net: stmmac: Fix a race in EEE enable callback We are saving the status of EEE even before we try to enable it. This leads to a race with XMIT function that tries to arm EEE timer before we set it up. Fix this by only saving the EEE parameters after all operations are performed with success. Signed-off-by: Jose Abreu Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support") Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 5d85742a2be0..3c749c327cbd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -696,25 +696,27 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct ethtool_eee *edata) { struct stmmac_priv *priv = netdev_priv(dev); + int ret; - priv->eee_enabled = edata->eee_enabled; - - if (!priv->eee_enabled) + if (!edata->eee_enabled) { stmmac_disable_eee_mode(priv); - else { + } else { /* We are asking for enabling the EEE but it is safe * to verify all by invoking the eee_init function. * In case of failure it will return an error. */ - priv->eee_enabled = stmmac_eee_init(priv); - if (!priv->eee_enabled) + edata->eee_enabled = stmmac_eee_init(priv); + if (!edata->eee_enabled) return -EOPNOTSUPP; - - /* Do not change tx_lpi_timer in case of failure */ - priv->tx_lpi_timer = edata->tx_lpi_timer; } - return phy_ethtool_set_eee(dev->phydev, edata); + ret = phy_ethtool_set_eee(dev->phydev, edata); + if (ret) + return ret; + + priv->eee_enabled = edata->eee_enabled; + priv->tx_lpi_timer = edata->tx_lpi_timer; + return 0; } static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) From 4d96e13ee9cd1f7f801e8c7f4b12f09d1da4a5d8 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Mon, 18 Feb 2019 17:40:32 +0000 Subject: [PATCH 1347/1436] net: hns: Fixes the missing put_device in positive leg for roce reset This patch fixes the missing device reference release-after-use in the positive leg of the roce reset API of the HNS DSAF. Fixes: c969c6e7ab8c ("net: hns: Fix object reference leaks in hns_dsaf_roce_reset()") Reported-by: John Garry Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index b8155f5e71b4..ac55db065f16 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3128,6 +3128,9 @@ int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1); dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); } + + put_device(&pdev->dev); + return 0; } EXPORT_SYMBOL(hns_dsaf_roce_reset); From 1f43f400a2cbb02f3d34de8fe30075c070254816 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Mon, 18 Feb 2019 15:10:51 -0500 Subject: [PATCH 1348/1436] net: netcp: Fix ethss driver probe issue Recent commit below has introduced a bug in netcp driver that causes the ethss driver probe failure and thus break the networking function on K2 SoCs such as K2HK, K2L, K2E etc. This patch fixes the issue to restore networking on the above SoCs. Fixes: 21c328dcecfc ("net: ethernet: Convert to using %pOFn instead of device_node.name") Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1f612268c998..d847f672a705 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -259,7 +259,7 @@ static int netcp_module_probe(struct netcp_device *netcp_device, const char *name; char node_name[32]; - if (of_property_read_string(node, "label", &name) < 0) { + if (of_property_read_string(child, "label", &name) < 0) { snprintf(node_name, sizeof(node_name), "%pOFn", child); name = node_name; } From 254c5dbe15d44c9d76a3ccd3724eb1befb7adf99 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 14 Feb 2019 18:29:53 -0800 Subject: [PATCH 1349/1436] 6lowpan: use rbtree for IP frag queue This patch aligns IP defragmenation logic in 6lowpan with that of IPv4 and IPv6: see commit d4289fcc9b16 ("net: IP6 defrag: use rbtrees for IPv6 defrag") Modifying ip_defrag selftest seemed like an overkill, as I suspect most kernel test setups do not have 6lowpan hwsim enabled. So I ran the following code/script manually: insmod ./mac802154_hwsim.ko iwpan dev wpan0 set pan_id 0xbeef ip link add link wpan0 name lowpan0 type lowpan ip link set wpan0 up ip link set lowpan0 up iwpan dev wpan1 set pan_id 0xbeef ip netns add foo iwpan phy1 set netns name foo ip netns exec foo ip link add link wpan1 name lowpan1 type lowpan ip netns exec foo ip link set wpan1 up ip netns exec foo ip link set lowpan1 up ip -6 addr add "fb01::1/128" nodad dev lowpan0 ip -netns foo -6 addr add "fb02::1/128" nodad dev lowpan1 ip -6 route add "fb02::1/128" dev lowpan0 ip -netns foo -6 route add "fb01::1/128" dev lowpan1 # then in term1: ip netns exec foo bash ./udp_stream -6 # in term2: ./udp_stream -c -6 -H fb02::1 # pr_warn_once showed that the code changed by this patch # was invoked. Signed-off-by: Peter Oskolkov Acked-by: Alexander Aring Signed-off-by: Stefan Schmidt --- net/ieee802154/6lowpan/reassembly.c | 141 ++++++---------------------- 1 file changed, 29 insertions(+), 112 deletions(-) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index d14226ecfde4..bd61633d2c32 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "6lowpan_i.h" @@ -34,8 +35,8 @@ static const char lowpan_frags_cache_name[] = "lowpan-frags"; static struct inet_frags lowpan_frags; -static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, - struct sk_buff *prev, struct net_device *ldev); +static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev, struct net_device *ldev); static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { @@ -88,9 +89,15 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, static int lowpan_frag_queue(struct lowpan_frag_queue *fq, struct sk_buff *skb, u8 frag_type) { - struct sk_buff *prev, *next; + struct sk_buff *prev_tail; struct net_device *ldev; - int end, offset; + int end, offset, err; + + /* inet_frag_queue_* functions use skb->cb; see struct ipfrag_skb_cb + * in inet_fragment.c + */ + BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet_skb_parm)); + BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet6_skb_parm)); if (fq->q.flags & INET_FRAG_COMPLETE) goto err; @@ -117,38 +124,15 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, } } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || - lowpan_802154_cb(prev)->d_offset < - lowpan_802154_cb(skb)->d_offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (lowpan_802154_cb(next)->d_offset >= - lowpan_802154_cb(skb)->d_offset) - break; /* bingo! */ - prev = next; - } - -found: - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; - ldev = skb->dev; if (ldev) skb->dev = NULL; + barrier(); + + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto err; fq->q.stamp = skb->tstamp; if (frag_type == LOWPAN_DISPATCH_FRAG1) @@ -163,10 +147,11 @@ found: unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = lowpan_frag_reasm(fq, prev, ldev); + res = lowpan_frag_reasm(fq, skb, prev_tail, ldev); skb->_skb_refdst = orefdst; return res; } + skb_dst_drop(skb); return -1; err: @@ -175,97 +160,29 @@ err: } /* Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, - struct net_device *ldev) +static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *ldev) { - struct sk_buff *fp, *head = fq->q.fragments; - int sum_truesize; + void *reasm_data; inet_frag_kill(&fq->q); - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) goto out_oom; + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. - */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = head->data_len - plen; - clone->data_len = clone->len; - head->data_len -= clone->len; - head->len -= clone->len; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - WARN_ON(head == NULL); - - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); - - skb_mark_not_on_list(head); - head->dev = ldev; - head->tstamp = fq->q.stamp; - + skb->dev = ldev; + skb->tstamp = fq->q.stamp; fq->q.fragments = NULL; + fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oom: From 4509de14680084141d3514c3b87bd9d070fc366d Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Thu, 14 Feb 2019 07:11:35 +0000 Subject: [PATCH 1350/1436] net/tls: Move protocol constants from cipher context to tls context Each tls context maintains two cipher contexts (one each for tx and rx directions). For each tls session, the constants such as protocol version, ciphersuite, iv size, associated data size etc are same for both the directions and need to be stored only once per tls context. Hence these are moved from 'struct cipher_context' to 'struct tls_prot_info' and stored only once in 'struct tls_context'. Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- include/net/tls.h | 46 +++++++----- net/tls/tls_device.c | 24 +++--- net/tls/tls_main.c | 17 ++++- net/tls/tls_sw.c | 172 +++++++++++++++++++++++-------------------- 4 files changed, 149 insertions(+), 110 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a93a8ed8f716..a8b37226a287 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -199,15 +199,8 @@ enum { }; struct cipher_context { - u16 prepend_size; - u16 tag_size; - u16 overhead_size; - u16 iv_size; char *iv; - u16 rec_seq_size; char *rec_seq; - u16 aad_size; - u16 tail_size; }; union tls_crypto_context { @@ -218,7 +211,21 @@ union tls_crypto_context { }; }; +struct tls_prot_info { + u16 version; + u16 cipher_type; + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + u16 rec_seq_size; + u16 aad_size; + u16 tail_size; +}; + struct tls_context { + struct tls_prot_info prot_info; + union tls_crypto_context crypto_send; union tls_crypto_context crypto_recv; @@ -401,16 +408,26 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ulp_data; +} + static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx, int version) { - if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; + + if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); if (version != TLS_1_3_VERSION) { tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - ctx->iv_size); + prot->iv_size); } } @@ -420,9 +437,10 @@ static inline void tls_fill_prepend(struct tls_context *ctx, unsigned char record_type, int version) { - size_t pkt_len, iv_size = ctx->tx.iv_size; + struct tls_prot_info *prot = &ctx->prot_info; + size_t pkt_len, iv_size = prot->iv_size; - pkt_len = plaintext_len + ctx->tx.tag_size; + pkt_len = plaintext_len + prot->tag_size; if (version != TLS_1_3_VERSION) { pkt_len += iv_size; @@ -475,12 +493,6 @@ static inline void xor_iv_with_seq(int version, char *iv, char *seq) } } -static inline struct tls_context *tls_get_ctx(const struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - return icsk->icsk_ulp_data; -} static inline struct tls_sw_context_rx *tls_sw_ctx_rx( const struct tls_context *tls_ctx) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 7ee9008b2187..a5c17c47d08a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -247,6 +247,7 @@ static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { + struct tls_prot_info *prot = &ctx->prot_info; struct tcp_sock *tp = tcp_sk(sk); struct page_frag dummy_tag_frag; skb_frag_t *frag; @@ -256,7 +257,7 @@ static int tls_push_record(struct sock *sk, frag = &record->frags[0]; tls_fill_prepend(ctx, skb_frag_address(frag), - record->len - ctx->tx.prepend_size, + record->len - prot->prepend_size, record_type, ctx->crypto_send.info.version); @@ -264,7 +265,7 @@ static int tls_push_record(struct sock *sk, dummy_tag_frag.page = skb_frag_page(frag); dummy_tag_frag.offset = 0; - tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size); + tls_append_frag(record, &dummy_tag_frag, prot->tag_size); record->end_seq = tp->write_seq + record->len; spin_lock_irq(&offload_ctx->lock); list_add_tail(&record->list, &offload_ctx->records_list); @@ -347,6 +348,7 @@ static int tls_push_data(struct sock *sk, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); @@ -376,10 +378,10 @@ static int tls_push_data(struct sock *sk, * we need to leave room for an authentication tag. */ max_open_record_len = TLS_MAX_PAYLOAD_SIZE + - tls_ctx->tx.prepend_size; + prot->prepend_size; do { rc = tls_do_allocation(sk, ctx, pfrag, - tls_ctx->tx.prepend_size); + prot->prepend_size); if (rc) { rc = sk_stream_wait_memory(sk, &timeo); if (!rc) @@ -397,7 +399,7 @@ handle_error: size = orig_size; destroy_record(record); ctx->open_record = NULL; - } else if (record->len > tls_ctx->tx.prepend_size) { + } else if (record->len > prot->prepend_size) { goto last_record; } @@ -658,6 +660,8 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { u16 nonce_size, tag_size, iv_size, rec_seq_size; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; struct tls_crypto_info *crypto_info; @@ -703,10 +707,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto free_offload_ctx; } - ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size; - ctx->tx.tag_size = tag_size; - ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size; - ctx->tx.iv_size = iv_size; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = tag_size; + prot->overhead_size = prot->prepend_size + prot->tag_size; + prot->iv_size = iv_size; ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); if (!ctx->tx.iv) { @@ -716,7 +720,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); - ctx->tx.rec_seq_size = rec_seq_size; + prot->rec_seq_size = rec_seq_size; ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); if (!ctx->tx.rec_seq) { rc = -ENOMEM; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index d1c2fd9a3f63..caff15b2f9b2 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -435,6 +435,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, unsigned int optlen, int tx) { struct tls_crypto_info *crypto_info; + struct tls_crypto_info *alt_crypto_info; struct tls_context *ctx = tls_get_ctx(sk); size_t optsize; int rc = 0; @@ -445,10 +446,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, goto out; } - if (tx) + if (tx) { crypto_info = &ctx->crypto_send.info; - else + alt_crypto_info = &ctx->crypto_recv.info; + } else { crypto_info = &ctx->crypto_recv.info; + alt_crypto_info = &ctx->crypto_send.info; + } /* Currently we don't support set crypto info more than one time */ if (TLS_CRYPTO_INFO_READY(crypto_info)) { @@ -469,6 +473,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, goto err_crypto_info; } + /* Ensure that TLS version and ciphers are same in both directions */ + if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { + if (alt_crypto_info->version != crypto_info->version || + alt_crypto_info->cipher_type != crypto_info->cipher_type) { + rc = -EINVAL; + goto err_crypto_info; + } + } + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: case TLS_CIPHER_AES_GCM_256: { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ae4784734547..71be8acfbc9b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -127,7 +127,7 @@ static int padding_length(struct tls_sw_context_rx *ctx, int sub = 0; /* Determine zero-padding length */ - if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) { + if (tls_ctx->prot_info.version == TLS_1_3_VERSION) { char content_type = 0; int err; int back = 17; @@ -155,6 +155,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_context *tls_ctx; + struct tls_prot_info *prot; struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; @@ -163,6 +164,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); ctx = tls_sw_ctx_rx(tls_ctx); + prot = &tls_ctx->prot_info; /* Propagate if there was an err */ if (err) { @@ -171,8 +173,8 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) } else { struct strp_msg *rxm = strp_msg(skb); rxm->full_len -= padding_length(ctx, tls_ctx, skb); - rxm->offset += tls_ctx->rx.prepend_size; - rxm->full_len -= tls_ctx->rx.overhead_size; + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; } /* After using skb->sk to propagate sk through crypto async callback @@ -209,13 +211,14 @@ static int tls_do_decryption(struct sock *sk, bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); int ret; aead_request_set_tfm(aead_req, ctx->aead_recv); - aead_request_set_ad(aead_req, tls_ctx->rx.aad_size); + aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, sgin, sgout, - data_len + tls_ctx->rx.tag_size, + data_len + prot->tag_size, (u8 *)iv_recv); if (async) { @@ -253,12 +256,13 @@ static int tls_do_decryption(struct sock *sk, static void tls_trim_both_msgs(struct sock *sk, int target_size) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; sk_msg_trim(sk, &rec->msg_plaintext, target_size); if (target_size > 0) - target_size += tls_ctx->tx.overhead_size; + target_size += prot->overhead_size; sk_msg_trim(sk, &rec->msg_encrypted, target_size); } @@ -275,6 +279,7 @@ static int tls_alloc_encrypted_msg(struct sock *sk, int len) static int tls_clone_plaintext_msg(struct sock *sk, int required) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl = &rec->msg_plaintext; @@ -290,7 +295,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required) /* Skip initial bytes in msg_en's data to be able to use * same offset of both plain and encrypted data. */ - skip = tls_ctx->tx.prepend_size + msg_pl->sg.size; + skip = prot->prepend_size + msg_pl->sg.size; return sk_msg_clone(sk, msg_pl, msg_en, skip, len); } @@ -298,6 +303,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required) static struct tls_rec *tls_get_rec(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; @@ -316,13 +322,11 @@ static struct tls_rec *tls_get_rec(struct sock *sk) sk_msg_init(msg_en); sg_init_table(rec->sg_aead_in, 2); - sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, - tls_ctx->tx.aad_size); + sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_in[1]); sg_init_table(rec->sg_aead_out, 2); - sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, - tls_ctx->tx.aad_size); + sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_out[1]); return rec; @@ -411,6 +415,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) struct aead_request *aead_req = (struct aead_request *)req; struct sock *sk = req->data; struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct scatterlist *sge; struct sk_msg *msg_en; @@ -422,8 +427,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) msg_en = &rec->msg_encrypted; sge = sk_msg_elem(msg_en, msg_en->sg.curr); - sge->offset -= tls_ctx->tx.prepend_size; - sge->length += tls_ctx->tx.prepend_size; + sge->offset -= prot->prepend_size; + sge->length += prot->prepend_size; /* Check if error is previously set on socket */ if (err || sk->sk_err) { @@ -470,22 +475,23 @@ static int tls_do_encryption(struct sock *sk, struct aead_request *aead_req, size_t data_len, u32 start) { + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; struct scatterlist *sge = sk_msg_elem(msg_en, start); int rc; memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); - xor_iv_with_seq(tls_ctx->crypto_send.info.version, rec->iv_data, + xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); - sge->offset += tls_ctx->tx.prepend_size; - sge->length -= tls_ctx->tx.prepend_size; + sge->offset += prot->prepend_size; + sge->length -= prot->prepend_size; msg_en->sg.curr = start; aead_request_set_tfm(aead_req, ctx->aead_send); - aead_request_set_ad(aead_req, tls_ctx->tx.aad_size); + aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, rec->sg_aead_in, rec->sg_aead_out, data_len, rec->iv_data); @@ -500,8 +506,8 @@ static int tls_do_encryption(struct sock *sk, rc = crypto_aead_encrypt(aead_req); if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); - sge->offset -= tls_ctx->tx.prepend_size; - sge->length += tls_ctx->tx.prepend_size; + sge->offset -= prot->prepend_size; + sge->length += prot->prepend_size; } if (!rc) { @@ -513,8 +519,7 @@ static int tls_do_encryption(struct sock *sk, /* Unhook the record from context if encryption is not failure */ ctx->open_rec = NULL; - tls_advance_record_sn(sk, &tls_ctx->tx, - tls_ctx->crypto_send.info.version); + tls_advance_record_sn(sk, &tls_ctx->tx, prot->version); return rc; } @@ -640,6 +645,7 @@ static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec, *tmp = NULL; u32 i, split_point, uninitialized_var(orig_end); @@ -658,12 +664,12 @@ static int tls_push_record(struct sock *sk, int flags, split = split_point && split_point < msg_pl->sg.size; if (split) { rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en, - split_point, tls_ctx->tx.overhead_size, + split_point, prot->overhead_size, &orig_end); if (rc < 0) return rc; sk_msg_trim(sk, msg_en, msg_pl->sg.size + - tls_ctx->tx.overhead_size); + prot->overhead_size); } rec->tx_flags = flags; @@ -673,7 +679,7 @@ static int tls_push_record(struct sock *sk, int flags, sk_msg_iter_var_prev(i); rec->content_type = record_type; - if (tls_ctx->crypto_send.info.version == TLS_1_3_VERSION) { + if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); @@ -694,22 +700,20 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_en->sg.start; sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); - tls_make_aad(rec->aad_space, msg_pl->sg.size + tls_ctx->tx.tail_size, - tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size, - record_type, - tls_ctx->crypto_send.info.version); + tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size, + tls_ctx->tx.rec_seq, prot->rec_seq_size, + record_type, prot->version); tls_fill_prepend(tls_ctx, page_address(sg_page(&msg_en->sg.data[i])) + msg_en->sg.data[i].offset, - msg_pl->sg.size + tls_ctx->tx.tail_size, - record_type, - tls_ctx->crypto_send.info.version); + msg_pl->sg.size + prot->tail_size, + record_type, prot->version); tls_ctx->pending_open_record_frags = false; rc = tls_do_encryption(sk, tls_ctx, ctx, req, - msg_pl->sg.size + tls_ctx->tx.tail_size, i); + msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); @@ -723,8 +727,7 @@ static int tls_push_record(struct sock *sk, int flags, } else if (split) { msg_pl = &tmp->msg_plaintext; msg_en = &tmp->msg_encrypted; - sk_msg_trim(sk, msg_en, msg_pl->sg.size + - tls_ctx->tx.overhead_size); + sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); tls_ctx->pending_open_record_frags = true; ctx->open_rec = tmp; } @@ -859,6 +862,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); bool async_capable = ctx->async_capable; unsigned char record_type = TLS_RECORD_TYPE_DATA; @@ -925,7 +929,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } required_size = msg_pl->sg.size + try_to_copy + - tls_ctx->tx.overhead_size; + prot->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -994,8 +998,8 @@ fallback_to_reg_send: */ try_to_copy -= required_size - msg_pl->sg.size; full_record = true; - sk_msg_trim(sk, msg_en, msg_pl->sg.size + - tls_ctx->tx.overhead_size); + sk_msg_trim(sk, msg_en, + msg_pl->sg.size + prot->overhead_size); } if (try_to_copy) { @@ -1081,6 +1085,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; unsigned char record_type = TLS_RECORD_TYPE_DATA; struct sk_msg *msg_pl; struct tls_rec *rec; @@ -1130,8 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, full_record = true; } - required_size = msg_pl->sg.size + copy + - tls_ctx->tx.overhead_size; + required_size = msg_pl->sg.size + copy + prot->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -1330,6 +1334,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = strp_msg(skb); int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; struct aead_request *aead_req; @@ -1337,16 +1342,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, u8 *aad, *iv, *mem = NULL; struct scatterlist *sgin = NULL; struct scatterlist *sgout = NULL; - const int data_len = rxm->full_len - tls_ctx->rx.overhead_size + - tls_ctx->rx.tail_size; + const int data_len = rxm->full_len - prot->overhead_size + + prot->tail_size; if (*zc && (out_iov || out_sg)) { if (out_iov) n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1; else n_sgout = sg_nents(out_sg); - n_sgin = skb_nsg(skb, rxm->offset + tls_ctx->rx.prepend_size, - rxm->full_len - tls_ctx->rx.prepend_size); + n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size, + rxm->full_len - prot->prepend_size); } else { n_sgout = 0; *zc = false; @@ -1363,7 +1368,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); mem_size = aead_size + (nsg * sizeof(struct scatterlist)); - mem_size = mem_size + tls_ctx->rx.aad_size; + mem_size = mem_size + prot->aad_size; mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); /* Allocate a single block of memory which contains @@ -1379,37 +1384,35 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sgin = (struct scatterlist *)(mem + aead_size); sgout = sgin + n_sgin; aad = (u8 *)(sgout + n_sgout); - iv = aad + tls_ctx->rx.aad_size; + iv = aad + prot->aad_size; /* Prepare IV */ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - tls_ctx->rx.iv_size); + prot->iv_size); if (err < 0) { kfree(mem); return err; } - if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) + if (prot->version == TLS_1_3_VERSION) memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv)); else memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - xor_iv_with_seq(tls_ctx->crypto_recv.info.version, iv, - tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); /* Prepare AAD */ - tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size + - tls_ctx->rx.tail_size, - tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, - ctx->control, - tls_ctx->crypto_recv.info.version); + tls_make_aad(aad, rxm->full_len - prot->overhead_size + + prot->tail_size, + tls_ctx->rx.rec_seq, prot->rec_seq_size, + ctx->control, prot->version); /* Prepare sgin */ sg_init_table(sgin, n_sgin); - sg_set_buf(&sgin[0], aad, tls_ctx->rx.aad_size); + sg_set_buf(&sgin[0], aad, prot->aad_size); err = skb_to_sgvec(skb, &sgin[1], - rxm->offset + tls_ctx->rx.prepend_size, - rxm->full_len - tls_ctx->rx.prepend_size); + rxm->offset + prot->prepend_size, + rxm->full_len - prot->prepend_size); if (err < 0) { kfree(mem); return err; @@ -1418,7 +1421,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (n_sgout) { if (out_iov) { sg_init_table(sgout, n_sgout); - sg_set_buf(&sgout[0], aad, tls_ctx->rx.aad_size); + sg_set_buf(&sgout[0], aad, prot->aad_size); *chunk = 0; err = tls_setup_from_iter(sk, out_iov, data_len, @@ -1459,7 +1462,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - int version = tls_ctx->crypto_recv.info.version; + struct tls_prot_info *prot = &tls_ctx->prot_info; + int version = prot->version; struct strp_msg *rxm = strp_msg(skb); int err = 0; @@ -1480,8 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, rxm->full_len -= padding_length(ctx, tls_ctx, skb); - rxm->offset += tls_ctx->rx.prepend_size; - rxm->full_len -= tls_ctx->rx.overhead_size; + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, &tls_ctx->rx, version); ctx->decrypted = true; ctx->saved_data_ready(sk); @@ -1605,6 +1609,7 @@ int tls_sw_recvmsg(struct sock *sk, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct sk_psock *psock; unsigned char control = 0; ssize_t decrypted = 0; @@ -1667,11 +1672,11 @@ int tls_sw_recvmsg(struct sock *sk, rxm = strp_msg(skb); - to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size; + to_decrypt = rxm->full_len - prot->overhead_size; if (to_decrypt <= len && !is_kvec && !is_peek && ctx->control == TLS_RECORD_TYPE_DATA && - tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) + prot->version != TLS_1_3_VERSION) zc = true; /* Do not use async mode if record is non-data */ @@ -1875,6 +1880,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; struct strp_msg *rxm = strp_msg(skb); size_t cipher_overhead; @@ -1882,17 +1888,17 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) int ret; /* Verify that we have a full TLS header, or wait for more data */ - if (rxm->offset + tls_ctx->rx.prepend_size > skb->len) + if (rxm->offset + prot->prepend_size > skb->len) return 0; /* Sanity-check size of on-stack buffer. */ - if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) { + if (WARN_ON(prot->prepend_size > sizeof(header))) { ret = -EINVAL; goto read_failure; } /* Linearize header to local buffer */ - ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size); + ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size); if (ret < 0) goto read_failure; @@ -1901,12 +1907,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) data_len = ((header[4] & 0xFF) | (header[3] << 8)); - cipher_overhead = tls_ctx->rx.tag_size; - if (tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) - cipher_overhead += tls_ctx->rx.iv_size; + cipher_overhead = prot->tag_size; + if (prot->version != TLS_1_3_VERSION) + cipher_overhead += prot->iv_size; if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead + - tls_ctx->rx.tail_size) { + prot->tail_size) { ret = -EMSGSIZE; goto read_failure; } @@ -2066,6 +2072,8 @@ static void tx_work_handler(struct work_struct *work) int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; @@ -2171,18 +2179,20 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (crypto_info->version == TLS_1_3_VERSION) { nonce_size = 0; - cctx->aad_size = TLS_HEADER_SIZE; - cctx->tail_size = 1; + prot->aad_size = TLS_HEADER_SIZE; + prot->tail_size = 1; } else { - cctx->aad_size = TLS_AAD_SPACE_SIZE; - cctx->tail_size = 0; + prot->aad_size = TLS_AAD_SPACE_SIZE; + prot->tail_size = 0; } - cctx->prepend_size = TLS_HEADER_SIZE + nonce_size; - cctx->tag_size = tag_size; - cctx->overhead_size = cctx->prepend_size + cctx->tag_size + - cctx->tail_size; - cctx->iv_size = iv_size; + prot->version = crypto_info->version; + prot->cipher_type = crypto_info->cipher_type; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = tag_size; + prot->overhead_size = prot->prepend_size + + prot->tag_size + prot->tail_size; + prot->iv_size = iv_size; cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); if (!cctx->iv) { @@ -2192,7 +2202,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) /* Note: 128 & 256 bit salt are the same size */ memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); - cctx->rec_seq_size = rec_seq_size; + prot->rec_seq_size = rec_seq_size; cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); if (!cctx->rec_seq) { rc = -ENOMEM; @@ -2215,7 +2225,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (rc) goto free_aead; - rc = crypto_aead_setauthsize(*aead, cctx->tag_size); + rc = crypto_aead_setauthsize(*aead, prot->tag_size); if (rc) goto free_aead; From 3293ec232123e9a648ec5b02225cd32091ae243f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 19 Feb 2019 05:31:12 -0500 Subject: [PATCH 1351/1436] bnxt_en: Update firmware interface spec. to 1.10.0.47. Firmware error recover is the major change in this spec. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 196 ++++++++++++++++-- 1 file changed, 177 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 0a0995894ddb..b6c610339501 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -1,7 +1,7 @@ /* Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2014-2016 Broadcom Corporation - * Copyright (c) 2016-2018 Broadcom Limited + * Copyright (c) 2016-2019 Broadcom Limited * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -98,6 +98,7 @@ struct hwrm_short_input { struct cmd_nums { __le16 req_type; #define HWRM_VER_GET 0x0UL + #define HWRM_ERROR_RECOVERY_QCFG 0xcUL #define HWRM_FUNC_DRV_IF_CHANGE 0xdUL #define HWRM_FUNC_BUF_UNRGTR 0xeUL #define HWRM_FUNC_VF_CFG 0xfUL @@ -221,6 +222,7 @@ struct cmd_nums { #define HWRM_CFA_METER_PROFILE_CFG 0xf7UL #define HWRM_CFA_METER_INSTANCE_ALLOC 0xf8UL #define HWRM_CFA_METER_INSTANCE_FREE 0xf9UL + #define HWRM_CFA_METER_INSTANCE_CFG 0xfaUL #define HWRM_CFA_VFR_ALLOC 0xfdUL #define HWRM_CFA_VFR_FREE 0xfeUL #define HWRM_CFA_VF_PAIR_ALLOC 0x100UL @@ -269,6 +271,7 @@ struct cmd_nums { #define HWRM_ENGINE_CKV_FLUSH 0x133UL #define HWRM_ENGINE_CKV_RNG_GET 0x134UL #define HWRM_ENGINE_CKV_KEY_GEN 0x135UL + #define HWRM_ENGINE_CKV_KEY_LABEL_CFG 0x136UL #define HWRM_ENGINE_QG_CONFIG_QUERY 0x13cUL #define HWRM_ENGINE_QG_QUERY 0x13dUL #define HWRM_ENGINE_QG_METER_PROFILE_CONFIG_QUERY 0x13eUL @@ -296,6 +299,7 @@ struct cmd_nums { #define HWRM_ENGINE_NQ_ALLOC 0x162UL #define HWRM_ENGINE_NQ_FREE 0x163UL #define HWRM_ENGINE_ON_DIE_RQE_CREDITS 0x164UL + #define HWRM_ENGINE_FUNC_QCFG 0x165UL #define HWRM_FUNC_RESOURCE_QCAPS 0x190UL #define HWRM_FUNC_VF_RESOURCE_CFG 0x191UL #define HWRM_FUNC_BACKING_STORE_QCAPS 0x192UL @@ -379,15 +383,15 @@ struct hwrm_err_output { }; #define HWRM_NA_SIGNATURE ((__le32)(-1)) #define HWRM_MAX_REQ_LEN 128 -#define HWRM_MAX_RESP_LEN 280 +#define HWRM_MAX_RESP_LEN 704 #define HW_HASH_INDEX_SIZE 0x80 #define HW_HASH_KEY_SIZE 40 #define HWRM_RESP_VALID_KEY 1 #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 0 -#define HWRM_VERSION_RSVD 35 -#define HWRM_VERSION_STR "1.10.0.35" +#define HWRM_VERSION_RSVD 47 +#define HWRM_VERSION_STR "1.10.0.47" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -580,6 +584,7 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL #define ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL #define ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY 0x8UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY 0x9UL #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL #define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL @@ -595,6 +600,9 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION 0x37UL #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_REQ 0x38UL #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CACHE_FLUSH_DONE 0x39UL + #define ASYNC_EVENT_CMPL_EVENT_ID_TCP_FLAG_ACTION_CHANGE 0x3aUL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_FLOW_ACTIVE 0x3bUL + #define ASYNC_EVENT_CMPL_EVENT_ID_EEM_CFG_CHANGE 0x3cUL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -724,6 +732,30 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 }; +/* hwrm_async_event_cmpl_error_recovery (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_recovery { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_RECOVERY_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY 0x9UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_ID_ERROR_RECOVERY + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED 0x2UL +}; + /* hwrm_async_event_cmpl_vf_cfg_change (size:128b/16B) */ struct hwrm_async_event_cmpl_vf_cfg_change { __le16 type; @@ -1014,6 +1046,7 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE 0x100000UL #define FUNC_QCAPS_RESP_FLAGS_DYNAMIC_TX_RING_ALLOC 0x200000UL #define FUNC_QCAPS_RESP_FLAGS_HOT_RESET_CAPABLE 0x400000UL + #define FUNC_QCAPS_RESP_FLAGS_ERROR_RECOVERY_CAPABLE 0x800000UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1185,6 +1218,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE 0x200000UL #define FUNC_CFG_REQ_FLAGS_DYNAMIC_TX_RING_ALLOC 0x400000UL #define FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST 0x800000UL + #define FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE 0x1000000UL __le32 enables; #define FUNC_CFG_REQ_ENABLES_MTU 0x1UL #define FUNC_CFG_REQ_ENABLES_MRU 0x2UL @@ -1390,6 +1424,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE 0x4UL #define FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE 0x8UL #define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT 0x10UL + #define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT 0x20UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -2024,6 +2059,89 @@ struct hwrm_func_backing_store_cfg_output { u8 valid; }; +/* hwrm_error_recovery_qcfg_input (size:192b/24B) */ +struct hwrm_error_recovery_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_error_recovery_qcfg_output (size:1664b/208B) */ +struct hwrm_error_recovery_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 flags; + #define ERROR_RECOVERY_QCFG_RESP_FLAGS_HOST 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU 0x2UL + __le32 driver_polling_freq; + __le32 master_func_wait_period; + __le32 normal_func_wait_period; + __le32 master_func_wait_period_after_reset; + __le32 max_bailout_time_after_reset; + __le32 fw_health_status_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEALTH_STATUS_REG_ADDR_SFT 2 + __le32 fw_heartbeat_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_HEARTBEAT_REG_ADDR_SFT 2 + __le32 fw_reset_cnt_reg; + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_FW_RESET_CNT_REG_ADDR_SFT 2 + __le32 reset_inprogress_reg; + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_RESET_INPROGRESS_REG_ADDR_SFT 2 + __le32 reset_inprogress_reg_mask; + u8 unused_0[3]; + u8 reg_array_cnt; + __le32 reset_reg[16]; + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_MASK 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_SFT 0 + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_PCIE_CFG 0x0UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_GRC 0x1UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR0 0x2UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1 0x3UL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_LAST ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SPACE_BAR1 + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_MASK 0xfffffffcUL + #define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SFT 2 + __le32 reset_reg_val[16]; + u8 delay_after_reset[16]; + u8 unused_1[7]; + u8 valid; +}; + /* hwrm_func_drv_if_change_input (size:192b/24B) */ struct hwrm_func_drv_if_change_input { __le16 req_type; @@ -2955,6 +3073,7 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100GB 0x800UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MBHD 0x1000UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_10MB 0x2000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_200GB 0x4000UL __le16 supported_speeds_auto_mode; #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MBHD 0x1UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100MB 0x2UL @@ -2970,6 +3089,7 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_100GB 0x800UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MBHD 0x1000UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_10MB 0x2000UL + #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_AUTO_MODE_200GB 0x4000UL __le16 supported_speeds_eee_mode; #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_RSVD1 0x1UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_EEE_MODE_100MB 0x2UL @@ -4919,6 +5039,35 @@ struct hwrm_ring_free_output { u8 valid; }; +/* hwrm_ring_reset_input (size:192b/24B) */ +struct hwrm_ring_reset_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 ring_type; + #define RING_RESET_REQ_RING_TYPE_L2_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL + #define RING_RESET_REQ_RING_TYPE_ROCE_CMPL 0x3UL + #define RING_RESET_REQ_RING_TYPE_LAST RING_RESET_REQ_RING_TYPE_ROCE_CMPL + u8 unused_0; + __le16 ring_id; + u8 unused_1[4]; +}; + +/* hwrm_ring_reset_output (size:128b/16B) */ +struct hwrm_ring_reset_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[4]; + u8 consumer_idx[3]; + u8 valid; +}; + /* hwrm_ring_aggint_qcaps_input (size:128b/16B) */ struct hwrm_ring_aggint_qcaps_input { __le16 req_type; @@ -5446,19 +5595,21 @@ struct hwrm_cfa_encap_record_alloc_input { __le64 resp_addr; __le32 flags; #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_EXTERNAL 0x2UL u8 encap_type; - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_V4 0x9UL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE_V1 0xaUL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2_ETYPE 0xbUL - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_LAST CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2_ETYPE + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_V4 0x9UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE_V1 0xaUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2_ETYPE 0xbUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE_V6 0xcUL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_LAST CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN_GPE_V6 u8 unused_0[3]; __le32 encap_data[20]; }; @@ -5506,6 +5657,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input { #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP 0x2UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_METER 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_FID 0x8UL __le32 enables; #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID 0x1UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x2UL @@ -5627,7 +5779,8 @@ struct hwrm_cfa_ntuple_filter_cfg_input { #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_DST_ID 0x1UL #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_MIRROR_VNIC_ID 0x2UL #define CFA_NTUPLE_FILTER_CFG_REQ_ENABLES_NEW_METER_INSTANCE_ID 0x4UL - u8 unused_0[4]; + __le32 flags; + #define CFA_NTUPLE_FILTER_CFG_REQ_FLAGS_DEST_FID 0x1UL __le64 ntuple_filter_id; __le32 new_dst_id; __le32 new_mirror_vnic_id; @@ -5892,13 +6045,15 @@ struct hwrm_cfa_flow_info_input { __le64 ext_flow_handle; }; -/* hwrm_cfa_flow_info_output (size:448b/56B) */ +/* hwrm_cfa_flow_info_output (size:5632b/704B) */ struct hwrm_cfa_flow_info_output { __le16 error_code; __le16 req_type; __le16 seq_id; __le16 resp_len; u8 flags; + #define CFA_FLOW_INFO_RESP_FLAGS_PATH_TX 0x1UL + #define CFA_FLOW_INFO_RESP_FLAGS_PATH_RX 0x2UL u8 profile; __le16 src_fid; __le16 dst_fid; @@ -5910,7 +6065,10 @@ struct hwrm_cfa_flow_info_output { __le16 flow_handle; __le32 tunnel_handle; __le16 flow_timer; - u8 unused_0[5]; + u8 unused_0[6]; + __le32 flow_key_data[130]; + __le32 flow_action_info[30]; + u8 unused_1[7]; u8 valid; }; From c6cc32a2133cb1eb5aa28ced1852aab2aeaf357a Mon Sep 17 00:00:00 2001 From: Erik Burrows Date: Tue, 19 Feb 2019 05:31:13 -0500 Subject: [PATCH 1352/1436] bnxt_en: Add support for BCM957504 Add support for BCM957504 with device ID 1751 Signed-off-by: Erik Burrows Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 92d73453a318..0f7a34a3b004 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1,7 +1,7 @@ /* Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2014-2016 Broadcom Corporation - * Copyright (c) 2016-2018 Broadcom Limited + * Copyright (c) 2016-2019 Broadcom Limited * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -112,6 +112,7 @@ enum board_idx { BCM57454, BCM5745x_NPAR, BCM57508, + BCM57504, BCM58802, BCM58804, BCM58808, @@ -155,6 +156,7 @@ static const struct { [BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM5745x_NPAR] = { "Broadcom BCM5745x NetXtreme-E Ethernet Partition" }, [BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, + [BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" }, [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, @@ -201,6 +203,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 }, { PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 }, { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 }, + { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV From 2a516444434ffa4419e67c5289d5f53272cb9674 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 19 Feb 2019 05:31:14 -0500 Subject: [PATCH 1353/1436] bnxt_en: Propagate trusted VF attribute to firmware. Newer firmware understands the concept of a trusted VF, so propagate the trusted VF attribute set by the PF admin. to the firmware. Also, check the firmware trusted setting when considering the VF MAC address change and reporting the trusted setting to the user. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 + .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 58 +++++++++++++++++-- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0f7a34a3b004..9700891a26b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6683,6 +6683,10 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) VER_GET_RESP_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED) bp->fw_cap |= BNXT_FW_CAP_OVS_64BIT_HANDLE; + if (dev_caps_cfg & + VER_GET_RESP_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_TRUSTED_VF; + hwrm_ver_get_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 17554d4be651..ecbe7d28a723 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -945,6 +945,7 @@ struct bnxt_vf_info { * stored by PF. */ u16 vlan; + u16 func_qcfg_flags; u32 flags; #define BNXT_VF_QOS 0x1 #define BNXT_VF_SPOOFCHK 0x2 @@ -1478,6 +1479,7 @@ struct bnxt { #define BNXT_FW_CAP_IF_CHANGE 0x00000010 #define BNXT_FW_CAP_KONG_MB_CHNL 0x00000080 #define BNXT_FW_CAP_OVS_64BIT_HANDLE 0x00000400 + #define BNXT_FW_CAP_TRUSTED_VF 0x00000800 #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) u32 hwrm_spec_code; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index d80f5c981d90..2b90a2bb1a1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -121,6 +121,54 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting) return rc; } +static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf) +{ + struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_func_qcfg_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1); + req.fid = cpu_to_le16(vf->fw_fid); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); + return -EIO; + } + vf->func_qcfg_flags = le16_to_cpu(resp->flags); + mutex_unlock(&bp->hwrm_cmd_lock); + return 0; +} + +static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf) +{ + if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF)) + return !!(vf->flags & BNXT_VF_TRUST); + + bnxt_hwrm_func_qcfg_flags(bp, vf); + return !!(vf->func_qcfg_flags & FUNC_QCFG_RESP_FLAGS_TRUSTED_VF); +} + +static int bnxt_hwrm_set_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf) +{ + struct hwrm_func_cfg_input req = {0}; + int rc; + + if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF)) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1); + req.fid = cpu_to_le16(vf->fw_fid); + if (vf->flags & BNXT_VF_TRUST) + req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE); + else + req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + return -EIO; + return 0; +} + int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted) { struct bnxt *bp = netdev_priv(dev); @@ -135,6 +183,7 @@ int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted) else vf->flags &= ~BNXT_VF_TRUST; + bnxt_hwrm_set_trusted_vf(bp, vf); return 0; } @@ -164,7 +213,7 @@ int bnxt_get_vf_config(struct net_device *dev, int vf_id, else ivi->qos = 0; ivi->spoofchk = !!(vf->flags & BNXT_VF_SPOOFCHK); - ivi->trusted = !!(vf->flags & BNXT_VF_TRUST); + ivi->trusted = bnxt_is_trusted_vf(bp, vf); if (!(vf->flags & BNXT_VF_LINK_FORCED)) ivi->linkstate = IFLA_VF_LINK_STATE_AUTO; else if (vf->flags & BNXT_VF_LINK_UP) @@ -935,9 +984,10 @@ static int bnxt_vf_configure_mac(struct bnxt *bp, struct bnxt_vf_info *vf) * if the PF assigned MAC address is zero */ if (req->enables & cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR)) { + bool trust = bnxt_is_trusted_vf(bp, vf); + if (is_valid_ether_addr(req->dflt_mac_addr) && - ((vf->flags & BNXT_VF_TRUST) || - !is_valid_ether_addr(vf->mac_addr) || + (trust || !is_valid_ether_addr(vf->mac_addr) || ether_addr_equal(req->dflt_mac_addr, vf->mac_addr))) { ether_addr_copy(vf->vf_mac_addr, req->dflt_mac_addr); return bnxt_hwrm_exec_fwd_resp(bp, vf, msg_size); @@ -962,7 +1012,7 @@ static int bnxt_vf_validate_set_mac(struct bnxt *bp, struct bnxt_vf_info *vf) * Otherwise, it must match the VF MAC address if firmware spec >= * 1.2.2 */ - if (vf->flags & BNXT_VF_TRUST) { + if (bnxt_is_trusted_vf(bp, vf)) { mac_ok = true; } else if (is_valid_ether_addr(vf->mac_addr)) { if (ether_addr_equal((const u8 *)req->l2_addr, vf->mac_addr)) From 0ca12be99667265ef8100860601d510e78f22cea Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 19 Feb 2019 05:31:15 -0500 Subject: [PATCH 1354/1436] bnxt_en: Add support for mdio read/write to external PHY Add support for SIOCGMIIREG and SIOCSMIIREG ioctls to mdio read/write to external PHY. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 69 ++++++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9700891a26b8..a9edf946d3bf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -8621,24 +8622,88 @@ static int bnxt_close(struct net_device *dev) return 0; } +static int bnxt_hwrm_port_phy_read(struct bnxt *bp, u16 phy_addr, u16 reg, + u16 *val) +{ + struct hwrm_port_phy_mdio_read_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_port_phy_mdio_read_input req = {0}; + int rc; + + if (bp->hwrm_spec_code < 0x10a00) + return -EOPNOTSUPP; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_READ, -1, -1); + req.port_id = cpu_to_le16(bp->pf.port_id); + req.phy_addr = phy_addr; + req.reg_addr = cpu_to_le16(reg & 0x1f); + if (bp->link_info.support_speeds & BNXT_LINK_SPEED_MSK_10GB) { + req.cl45_mdio = 1; + req.phy_addr = mdio_phy_id_prtad(phy_addr); + req.dev_addr = mdio_phy_id_devad(phy_addr); + req.reg_addr = cpu_to_le16(reg); + } + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *val = le16_to_cpu(resp->reg_data); + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + +static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg, + u16 val) +{ + struct hwrm_port_phy_mdio_write_input req = {0}; + + if (bp->hwrm_spec_code < 0x10a00) + return -EOPNOTSUPP; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_WRITE, -1, -1); + req.port_id = cpu_to_le16(bp->pf.port_id); + req.phy_addr = phy_addr; + req.reg_addr = cpu_to_le16(reg & 0x1f); + if (bp->link_info.support_speeds & BNXT_LINK_SPEED_MSK_10GB) { + req.cl45_mdio = 1; + req.phy_addr = mdio_phy_id_prtad(phy_addr); + req.dev_addr = mdio_phy_id_devad(phy_addr); + req.reg_addr = cpu_to_le16(reg); + } + req.reg_data = cpu_to_le16(val); + + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +} + /* rtnl_lock held */ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct mii_ioctl_data *mdio = if_mii(ifr); + struct bnxt *bp = netdev_priv(dev); + int rc; + switch (cmd) { case SIOCGMIIPHY: + mdio->phy_id = bp->link_info.phy_addr; + /* fallthru */ case SIOCGMIIREG: { + u16 mii_regval = 0; + if (!netif_running(dev)) return -EAGAIN; - return 0; + rc = bnxt_hwrm_port_phy_read(bp, mdio->phy_id, mdio->reg_num, + &mii_regval); + mdio->val_out = mii_regval; + return rc; } case SIOCSMIIREG: if (!netif_running(dev)) return -EAGAIN; - return 0; + return bnxt_hwrm_port_phy_write(bp, mdio->phy_id, mdio->reg_num, + mdio->val_in); default: /* do nothing */ From b2d69122fd627aa3c6c39c99b2b8706703a92634 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Tue, 19 Feb 2019 05:31:16 -0500 Subject: [PATCH 1355/1436] bnxt_en: Return relevant error code when offload fails The driver returns -ENOSPC when tc_can_offload() check fails. Since that routine checks for flow parameters that are not supported by the driver, we should return the more appropriate -EOPNOTSUPP. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 61a3457ed71b..44d6c5743fb9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1290,7 +1290,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, bnxt_tc_set_flow_dir(bp, flow, src_fid); if (!bnxt_tc_can_offload(bp, flow)) { - rc = -ENOSPC; + rc = -EOPNOTSUPP; goto free_node; } From 816db7663565cd23f74ed3d5c9240522e3fb0dda Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Feb 2019 14:53:44 +0800 Subject: [PATCH 1356/1436] vhost: correctly check the return value of translate_desc() in log_used() When fail, translate_desc() returns negative value, otherwise the number of iovs. So we should fail when the return value is negative instead of a blindly check against zero. Detected by CoverityScan, CID# 1442593: Control flow issues (DEADCODE) Fixes: cc5e71075947 ("vhost: log dirty page correctly") Acked-by: Michael S. Tsirkin Reported-by: Stephen Hemminger Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 24a129fcdd61..a2e5dc7716e2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1788,7 +1788,7 @@ static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, len, iov, 64, VHOST_ACCESS_WO); - if (ret) + if (ret < 0) return ret; for (i = 0; i < ret; i++) { From a968b5e9d5879f9535d6099505f9e14abcafb623 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 19 Feb 2019 12:29:43 +0530 Subject: [PATCH 1357/1436] net: dsa: qca8k: Enable delay for RGMII_ID mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RGMII_ID specifies that we should have internal delay, so resurrect the delay addition routine but under the RGMII_ID mode. Fixes: 40269aa9f40a ("net: dsa: qca8k: disable delay for RGMII mode") Tested-by: Michal Vokáč Signed-off-by: Vinod Koul Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 12 ++++++++++++ drivers/net/dsa/qca8k.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index a4b6cda38016..195a8a87b984 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -443,6 +443,18 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) val = QCA8K_PORT_PAD_RGMII_EN; qca8k_write(priv, reg, val); break; + case PHY_INTERFACE_MODE_RGMII_ID: + /* RGMII_ID needs internal delay. This is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(QCA8K_MAX_DELAY) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(QCA8K_MAX_DELAY)); + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; case PHY_INTERFACE_MODE_SGMII: qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); break; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 613fe5c50236..d146e54c8a6c 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -40,6 +40,7 @@ ((0x8 + (x & 0x3)) << 22) #define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ ((0x10 + (x & 0x3)) << 20) +#define QCA8K_MAX_DELAY 3 #define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) #define QCA8K_PORT_PAD_SGMII_EN BIT(7) #define QCA8K_REG_MODULE_EN 0x030 From 3b9c9f3b0b0293a52c0edd033d9df3a6b0e06f62 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Feb 2019 16:06:22 +0800 Subject: [PATCH 1358/1436] net: rose: add missing dev_put() on error in rose_bind when capable check failed, dev_put should be call before return -EACCES. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/rose/af_rose.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d00a0ef39a56..c96f63ffe31e 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -689,8 +689,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose->source_call = user->call; ax25_uid_put(user); } else { - if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) + if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { + dev_put(dev); return -EACCES; + } rose->source_call = *source; } From c2a5994fbb5bf6ae3fcdcc9bbe323d62f8e28618 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Tue, 19 Feb 2019 21:45:10 +0800 Subject: [PATCH 1359/1436] net: ns83820: code cleanup for ns83820_probe_phy() This patch is to do code cleanup for ns83820_probe_phy(). It deletes unused variable 'first', commented out code, and the pointless 'for' loop. Signed-off-by: Mao Wenan Acked-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/ns83820.c | 64 ++++++++------------------ 1 file changed, 18 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 000009e18ff8..9098ee7fe0d1 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1869,56 +1869,28 @@ static unsigned ns83820_mii_write_reg(struct ns83820 *dev, unsigned phy, unsigne static void ns83820_probe_phy(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); - static int first; - int i; -#define MII_PHYIDR1 0x02 -#define MII_PHYIDR2 0x03 + int j; + unsigned a, b; -#if 0 - if (!first) { - unsigned tmp; - ns83820_mii_read_reg(dev, 1, 0x09); - ns83820_mii_write_reg(dev, 1, 0x10, 0x0d3e); - - tmp = ns83820_mii_read_reg(dev, 1, 0x00); - ns83820_mii_write_reg(dev, 1, 0x00, tmp | 0x8000); - udelay(1300); - ns83820_mii_read_reg(dev, 1, 0x09); + for (j = 0; j < 0x16; j += 4) { + dprintk("%s: [0x%02x] %04x %04x %04x %04x\n", + ndev->name, j, + ns83820_mii_read_reg(dev, 1, 0 + j), + ns83820_mii_read_reg(dev, 1, 1 + j), + ns83820_mii_read_reg(dev, 1, 2 + j), + ns83820_mii_read_reg(dev, 1, 3 + j) + ); } -#endif - first = 1; - for (i=1; i<2; i++) { - int j; - unsigned a, b; - a = ns83820_mii_read_reg(dev, i, MII_PHYIDR1); - b = ns83820_mii_read_reg(dev, i, MII_PHYIDR2); + /* read firmware version: memory addr is 0x8402 and 0x8403 */ + ns83820_mii_write_reg(dev, 1, 0x16, 0x000d); + ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e); + a = ns83820_mii_read_reg(dev, 1, 0x1d); - //printk("%s: phy %d: 0x%04x 0x%04x\n", - // ndev->name, i, a, b); - - for (j=0; j<0x16; j+=4) { - dprintk("%s: [0x%02x] %04x %04x %04x %04x\n", - ndev->name, j, - ns83820_mii_read_reg(dev, i, 0 + j), - ns83820_mii_read_reg(dev, i, 1 + j), - ns83820_mii_read_reg(dev, i, 2 + j), - ns83820_mii_read_reg(dev, i, 3 + j) - ); - } - } - { - unsigned a, b; - /* read firmware version: memory addr is 0x8402 and 0x8403 */ - ns83820_mii_write_reg(dev, 1, 0x16, 0x000d); - ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e); - a = ns83820_mii_read_reg(dev, 1, 0x1d); - - ns83820_mii_write_reg(dev, 1, 0x16, 0x000d); - ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e); - b = ns83820_mii_read_reg(dev, 1, 0x1d); - dprintk("version: 0x%04x 0x%04x\n", a, b); - } + ns83820_mii_write_reg(dev, 1, 0x16, 0x000d); + ns83820_mii_write_reg(dev, 1, 0x1e, 0x810e); + b = ns83820_mii_read_reg(dev, 1, 0x1d); + dprintk("version: 0x%04x 0x%04x\n", a, b); } #endif From 2736d94f351b92749c07efef01f7c10548e39ad8 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jan 2019 10:50:10 +0200 Subject: [PATCH 1360/1436] ethtool: Added support for 50Gbps per lane link modes Added support for 50Gbps per lane link modes. Define various 50G, 100G and 200G link modes using it. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/uapi/linux/ethtool.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 17be76aeb468..378c52308d89 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1453,6 +1453,21 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1461,7 +1476,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + = ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ @@ -1569,6 +1584,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_50000 50000 #define SPEED_56000 56000 #define SPEED_100000 100000 +#define SPEED_200000 200000 #define SPEED_UNKNOWN -1 From 6a897372417e8c9e0e03c4c438fc1f48dd096ea9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jan 2019 10:29:57 +0200 Subject: [PATCH 1361/1436] net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes In previous patch, driver added new speed modes: 50Gbps per lane support for 50G/100G/200G. This patch modifies mlx5e_get_link_ksettings and mlx5e_set_link_ksettings to set and get these link modes via ethtool. In order to do so, added mapping of new HW bits to ethtool bitmap and enforce mutual exclusion between extended link modes and previously defined link modes. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 245 +++++++++++++----- 1 file changed, 182 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3cd73254d020..ee5dc8e354d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -63,76 +63,147 @@ struct ptys2ethtool_config { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); }; -static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER]; -#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \ +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \ ({ \ struct ptys2ethtool_config *cfg; \ const unsigned int modes[] = { __VA_ARGS__ }; \ - unsigned int i; \ - cfg = &ptys2ethtool_table[reg_]; \ + unsigned int i, bit, idx; \ + cfg = &ptys2##table##_ethtool_table[reg_]; \ bitmap_zero(cfg->supported, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ bitmap_zero(cfg->advertised, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ - __set_bit(modes[i], cfg->supported); \ - __set_bit(modes[i], cfg->advertised); \ + bit = modes[i] % 64; \ + idx = modes[i] / 64; \ + __set_bit(bit, &cfg->supported[idx]); \ + __set_bit(bit, &cfg->advertised[idx]); \ } \ }) void mlx5e_build_ptys2ethtool_map(void) { - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, + memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table)); + memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table)); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy, ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy, ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy, ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, ETHTOOL_LINK_MODE_10000baseT_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + ext, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + struct ptys2ethtool_config **arr, + u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); } typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); @@ -526,27 +597,35 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return mlx5e_ethtool_set_coalesce(priv, coal); } -static void ptys2ethtool_supported_link(unsigned long *supported_modes, +static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, + unsigned long *supported_modes, u32 eth_proto_cap) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(supported_modes, supported_modes, - ptys2ethtool_table[proto].supported, + table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static void ptys2ethtool_adver_link(unsigned long *advertising_modes, +static void ptys2ethtool_adver_link(struct mlx5_core_dev *mdev, + unsigned long *advertising_modes, u32 eth_proto_cap) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(advertising_modes, advertising_modes, - ptys2ethtool_table[proto].advertised, + table[proto].advertised, __ETHTOOL_LINK_MODE_MASK_NBITS); } @@ -716,22 +795,22 @@ out: link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, +static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, struct ethtool_link_ksettings *link_ksettings) { unsigned long *supported = link_ksettings->link_modes.supported; + ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); - ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, +static void get_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_cap, + u8 tx_pause, u8 rx_pause, struct ethtool_link_ksettings *link_ksettings) { unsigned long *advertising = link_ksettings->link_modes.advertising; + ptys2ethtool_adver_link(mdev, advertising, eth_proto_cap); - ptys2ethtool_adver_link(advertising, eth_proto_cap); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) @@ -781,12 +860,12 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, +static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; - ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); + ptys2ethtool_adver_link(mdev, lp_advertising, eth_proto_lp); } int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, @@ -803,6 +882,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, u8 an_disable_admin; u8 an_status; u8 connector_type; + bool ext; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); @@ -811,22 +891,25 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, __func__, err); goto err_query_regs; } - - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); - an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); - an_status = MLX5_GET(ptys_reg, out, an_status); - connector_type = MLX5_GET(ptys_reg, out, connector_type); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_admin); + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, link_ksettings); - get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); + get_supported(mdev, eth_proto_cap, link_ksettings); + get_advertising(mdev, eth_proto_admin, tx_pause, rx_pause, link_ksettings); get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -835,7 +918,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, connector_type); ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, connector_type); - get_lp_advertising(eth_proto_lp, link_ksettings); + get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) ethtool_link_ksettings_add_link_mode(link_ksettings, @@ -874,7 +957,9 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (bitmap_intersects(ptys2ethtool_table[i].advertised, + if (*ptys2legacy_ethtool_table[i].advertised == 0) + continue; + if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised, link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); @@ -883,6 +968,25 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) return ptys_modes; } +static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + unsigned long modes[2]; + + for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) { + if (*ptys2ext_ethtool_table[i].advertised == 0) + continue; + memset(modes, 0, sizeof(modes)); + bitmap_and(modes, ptys2ext_ethtool_table[i].advertised, + link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] && + modes[1] == ptys2ext_ethtool_table[i].advertised[1]) + ptys_modes |= MLX5E_PROT_MASK(i); + } + return ptys_modes; +} + int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { @@ -890,6 +994,8 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; + bool ext_supported; + bool ext_requested; u8 an_disable_cap; bool an_disable; u32 link_modes; @@ -897,18 +1003,31 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, u32 speed; int err; + u32 (*ethtool2ptys_adver_func)(const unsigned long *adver); + +#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1) + + ext_requested = (link_ksettings->link_modes.advertising[0] > + MLX5E_PTYS_EXT); + ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + /*when ptys_extended_ethernet is set legacy link modes are deprecated */ + if (ext_requested != ext_supported) + return -EPROTONOSUPPORT; + speed = link_ksettings->base.speed; - - link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(mdev, speed); - - err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); + ethtool2ptys_adver_func = ext_requested ? + mlx5e_ethtool2ptys_ext_adver_link : + mlx5e_ethtool2ptys_adver_link; + err = mlx5_port_query_eth_proto(mdev, 1, ext_supported, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + ethtool2ptys_adver_func(link_ksettings->link_modes.advertising) : + mlx5e_port_speed2linkmodes(mdev, speed); link_modes = link_modes & eproto.cap; if (!link_modes) { @@ -928,7 +1047,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_supported); mlx5_toggle_port_link(mdev); out: From 877662e27276f345ea07e49f153aa88e9a1e313c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 26 Nov 2018 17:22:16 +0200 Subject: [PATCH 1362/1436] net/mlx5e: Wrap the open and apply of channels in one fail-safe function Take into a function the common code structure of opening a side set of channels followed by a call to apply them. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +-- .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 4 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 54 +++++-------------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 33 +++++++----- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 +- 5 files changed, 39 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e9acfa9aa069..71c65cc17904 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -858,9 +858,9 @@ void mlx5e_close_channels(struct mlx5e_channels *chs); * switching channels */ typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify); +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 722998d68564..554672edf8c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1126,9 +1126,7 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) priv->channels.params.tx_min_inline_mode) goto out; - if (mlx5e_open_channels(priv, &new_channels)) - goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ee5dc8e354d6..0804b478ad19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -369,11 +369,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, goto unlock; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto unlock; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); unlock: mutex_unlock(&priv->state_lock); @@ -431,11 +427,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } - /* Create fresh channels with new parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); @@ -445,13 +436,14 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, MLX5E_INDIR_RQT_SIZE, count); /* Switch to new channels, set new parameters and close old ones */ - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (arfs_enabled) { - err = mlx5e_arfs_enable(priv); - if (err) + int err2 = mlx5e_arfs_enable(priv); + + if (err2) netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", - __func__, err); + __func__, err2); } out: @@ -577,12 +569,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, goto out; } - /* open fresh channels with new coal parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); @@ -1635,7 +1622,6 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, struct mlx5e_channels new_channels = {}; bool mode_changed; u8 cq_period_mode, current_cq_period_mode; - int err = 0; cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : @@ -1663,12 +1649,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1701,11 +1682,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return 0; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) return err; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); @@ -1738,7 +1718,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - int err; if (enable) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) @@ -1760,12 +1739,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1808,12 +1782,8 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return err; } static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 83510ca0bcd8..878b3467e459 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2885,13 +2885,14 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_deactivate_channels(&priv->channels); } -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) +static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) { struct net_device *netdev = priv->netdev; int new_num_txqs; int carrier_ok; + new_num_txqs = new_chs->num * new_chs->params.num_tc; carrier_ok = netif_carrier_ok(netdev); @@ -2917,6 +2918,20 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + int err; + + err = mlx5e_open_channels(priv, new_chs); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, new_chs, hw_modify); + return 0; +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; @@ -3333,13 +3348,12 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; priv->max_opened_tc = max_t(u8, priv->max_opened_tc, new_channels.params.num_tc); - mlx5e_switch_priv_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); return err; @@ -3549,11 +3563,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro); out: mutex_unlock(&priv->state_lock); return err; @@ -3771,11 +3781,10 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb); netdev->mtu = new_channels.params.sw_mtu; out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index bfc0f6581729..4eac42555c7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -446,11 +446,11 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_open_channels(priv, &new_channels); + + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); netdev->mtu = new_channels.params.sw_mtu; out: From 73c718fbb32adc2df8a7ee00c1a880b97d547d24 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 12 Feb 2019 19:45:16 +0200 Subject: [PATCH 1363/1436] net/mlx5e: Remove wrong and superfluous tc pedit header type check With recent introduction of flow_rule infrastructure drivers no longer directly include action headers, so it is no longer possible to use constants defined in them. Instead, one of flow_rule patches substituted pedit action header constant with hardcoded value '2' in mlx5 set_pedit_val() function conditional which verifies that header type is in range of values allowed by pedit action. That conditional is now both wrong (hardcoded value is '2' but __PEDIT_HDR_TYPE_MAX is 6 in current version) and superfluous (pedit action already verifies that header type is in allowed range during init). Remove the described check from mlx5 code. Fixes: 738678817573 ("drivers: net: use flow action infrastructure") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Dmytro Linkin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 05892fc4e03f..43b7191e94b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1827,9 +1827,6 @@ static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, { u32 *curr_pmask, *curr_pval; - if (hdr_type >= 2) - goto out_err; - curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); From 988ab9c7363a5b24e9221ea52227b65b3ce3e5db Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 11 Feb 2019 19:39:42 -0800 Subject: [PATCH 1364/1436] net/mlx5e: Introduce mlx5e_flow_esw_attr_init() helper Introduce the mlx5e_flow_esw_attr_init() helper for simplifying codes. Signed-off-by: Tonghao Zhang Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 43b7191e94b2..1df7e7d6431b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2733,6 +2733,30 @@ err_free: return err; } +static void +mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, + struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct tc_cls_flower_offload *f, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + esw_attr->parse_attr = parse_attr; + esw_attr->chain = f->common.chain_index; + esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + + esw_attr->in_rep = in_rep; + esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + esw_attr->counter_dev = in_mdev; + else + esw_attr->counter_dev = priv->mdev; +} + static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, @@ -2754,28 +2778,21 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, &parse_attr, &flow); if (err) goto out; + parse_attr->filter_dev = filter_dev; - flow->esw_attr->parse_attr = parse_attr; + mlx5e_flow_esw_attr_init(flow->esw_attr, + priv, parse_attr, + f, in_rep, in_mdev); + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, f, filter_dev); if (err) goto err_free; - flow->esw_attr->chain = f->common.chain_index; - flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; - flow->esw_attr->in_rep = in_rep; - flow->esw_attr->in_mdev = in_mdev; - - if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == - MLX5_COUNTER_SOURCE_ESWITCH) - flow->esw_attr->counter_dev = in_mdev; - else - flow->esw_attr->counter_dev = priv->mdev; - err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); if (err) goto err_free; From 7040632df58481424dd958a26ac0fa2d97721f22 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 11 Feb 2019 19:39:43 -0800 Subject: [PATCH 1365/1436] net/mlx5e: Remove 'parse_attr' argument in mlx5e_tc_add_fdb_flow() This patch is a little improvement. Simplify the mlx5e_tc_add_fdb_flow(). Signed-off-by: Tonghao Zhang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1df7e7d6431b..0b3cd3f7f18a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -929,13 +929,13 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; struct mlx5_fc *counter = NULL; @@ -967,7 +967,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) continue; - mirred_ifindex = attr->parse_attr->mirred_ifindex[out_index]; + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; out_dev = __dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); err = mlx5e_attach_encap(priv, @@ -2793,7 +2793,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); if (err) goto err_free; From 36a73471e55967248a4911517ab8deb410882060 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 17 Feb 2019 15:21:27 +0200 Subject: [PATCH 1366/1436] net/mlx5e: Add missing static function annotation Compilation with W=1 produces following warning: drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c:69:6: warning: no previous prototype for _mlx5e_monitor_counter_start_ [-Wmissing-prototypes] void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) ^~~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid it by declaring mlx5e_monitor_counter_start() as a static function. Fixes: 5c7e8bbb0257 ("net/mlx5e: Use monitor counters for update stats") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c index 2ce420851e77..7cd5b02e0f10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -66,7 +66,7 @@ static int mlx5e_monitor_event_handler(struct notifier_block *nb, return NOTIFY_OK; } -void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) +static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) { MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, MONITOR_COUNTER); From 566428375a53619196e31803130dd1a7010c4d7f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 17 Feb 2019 15:21:28 +0200 Subject: [PATCH 1367/1436] net/mlx5: Delete unused FPGA QPN variable fpga_qpn was assigned but never used and compilation with W=1 produced the following warning: drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c: In function _mlx5_fpga_event_: drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c:320:6: warning: variable _fpga_qpn_ set but not used [-Wunused-but-set-variable] u32 fpga_qpn; ^~~~~~~~ Fixes: 98db16bab59f ("net/mlx5: FPGA, Handle QP error event") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 27c5f6c7d36a..d046d1ec2a86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -317,7 +317,6 @@ static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, const char *event_name; bool teardown = false; unsigned long flags; - u32 fpga_qpn; u8 syndrome; switch (event) { @@ -328,7 +327,6 @@ static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, case MLX5_EVENT_TYPE_FPGA_QP_ERROR: syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); - fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn); break; default: return NOTIFY_DONE; From e87636117e9a093ae221c1b935bf1a698be50df0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Feb 2019 12:32:53 -0800 Subject: [PATCH 1368/1436] =?UTF-8?q?net/mlx5e:=20Remove=20unused=20variab?= =?UTF-8?q?le=20=E2=80=98esw=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compiler warning: drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:2770: warning: unused variable ‘esw’ [-Wunused-variable] Fixes: 1cd3ab86b713 ("net/mlx5e: Introduce mlx5e_flow_esw_attr_init() helper") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0b3cd3f7f18a..b38986e18dd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2767,7 +2767,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, { struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; int attr_size, err; From acad70731e63aa3a901bc04a81a4a152cadbe9bf Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Sun, 17 Feb 2019 22:23:16 -0600 Subject: [PATCH 1369/1436] net/mlx5: E-Switch, Fix the warning on vport index out of range When eswitch gets vport data structure, the index should not be out of the range of the vport array. Driver mistakenly used vport number to check the range. Fixes: 22b8ddc86bf4 ("net/mlx5: E-Switch, Assign a different position for uplink rep and vport") Signed-off-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index e18af31336e6..d4f6859bf58c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -87,7 +87,7 @@ static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, { u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); - WARN_ON(vport_num > esw->total_vports - 1); + WARN_ON(idx > esw->total_vports - 1); return &esw->vports[idx]; } From 1c50d369f560809d45e75fa9d7d6b3901192c18b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Sun, 17 Feb 2019 21:04:42 -0600 Subject: [PATCH 1370/1436] net/mlx5: E-Switch, Disable esw manager vport correctly When disabling vport, relevant vport configurations will be cleaned up. These cleanups should be done to the vports which had these configs applied at vport enablement. As esw manager vport didn't have such vport config applied, cleanup should not touch it. Fixes: de9e6a8136c5 ("net/mlx5: E-Switch, Properly refer to host PF vport as other vport") Signed-off-by: Bodong Wang Reported-by: Eli Britstein Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d4f6859bf58c..6cb9710f76d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1606,7 +1606,8 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (esw->mode == SRIOV_LEGACY) { + if (esw->manager_vport != vport_num && + esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, From 58066ac9d7f5dcde4ef08c03b7e127f0522d9ea0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Feb 2019 14:21:20 +0000 Subject: [PATCH 1371/1436] ptp_qoriq: don't pass a large struct by value but instead pass it by reference Passing the struct ptp_clock_info caps by parameter is passing over 130 bytes of data by value on the stack. Optimize this by passing it by reference instead. Also shinks the object code size: Before: text data bss dec hex filename 12596 2160 64 14820 39e4 drivers/ptp/ptp_qoriq.o After: text data bss dec hex filename 12567 2160 64 14791 39c7 drivers/ptp/ptp_qoriq.o Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +- drivers/ptp/ptp_qoriq.c | 6 +++--- include/linux/fsl/ptp_qoriq.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index dc2f58a7c9e5..8c1497e7d9c5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -92,7 +92,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev, ptp_qoriq->dev = &pdev->dev; - err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps); + err = ptp_qoriq_init(ptp_qoriq, base, &enetc_ptp_caps); if (err) goto err_no_clock; diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 42d3654f77f0..53775362aac6 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -459,7 +459,7 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, } int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, - const struct ptp_clock_info caps) + const struct ptp_clock_info *caps) { struct device_node *node = ptp_qoriq->dev->of_node; struct ptp_qoriq_registers *regs; @@ -468,7 +468,7 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, u32 tmr_ctrl; ptp_qoriq->base = base; - ptp_qoriq->caps = caps; + ptp_qoriq->caps = *caps; if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) ptp_qoriq->cksel = DEFAULT_CKSEL; @@ -605,7 +605,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) goto no_ioremap; } - err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps); + err = ptp_qoriq_init(ptp_qoriq, base, &ptp_qoriq_caps); if (err) goto no_clock; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index f127adb71041..992bf9fa1729 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -183,7 +183,7 @@ static inline void qoriq_write_le(unsigned __iomem *addr, u32 val) irqreturn_t ptp_qoriq_isr(int irq, void *priv); int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, - const struct ptp_clock_info caps); + const struct ptp_clock_info *caps); void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq); int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); From 1765f5dcd00963e33f1b8a4e0f34061fbc0e2f7f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 19 Feb 2019 23:45:29 +0800 Subject: [PATCH 1372/1436] sky2: Increase D3 delay again Another platform requires even longer delay to make the device work correctly after S3. So increase the delay to 300ms. BugLink: https://bugs.launchpad.net/bugs/1798921 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index f3a5fa84860f..57727fe1501e 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5073,7 +5073,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); - pdev->d3_delay = 200; + pdev->d3_delay = 300; return 0; From 203ef5f1ff9f6abb558310550c2597c5432d5509 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 20 Feb 2019 08:43:00 +0000 Subject: [PATCH 1373/1436] selftest/tls: Add test to verify received 'type' of non-data record Test case 'control_msg' has been updated to peek non-data record and then verify the type of record received. Subsequently, the same record is retrieved without MSG_PEEK flag in recvmsg(). Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4ac50ccb3272..47ddfc154036 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -753,6 +753,20 @@ TEST_F(tls, control_msg) EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); vec.iov_base = buf; + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + record_type = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(record_type, 100); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); + + /* Recv the message again without MSG_PEEK */ + record_type = 0; + memset(buf, 0, sizeof(buf)); + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); cmsg = CMSG_FIRSTHDR(&msg); EXPECT_NE(cmsg, NULL); From 418e897e0716b238ea4252ed22a73ca37d3cbbc1 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 20 Feb 2019 10:52:12 -0500 Subject: [PATCH 1374/1436] gso: validate gso_type on ipip style tunnels Commit 121d57af308d ("gso: validate gso_type in GSO handlers") added gso_type validation to existing gso_segment callback functions, to filter out illegal and potentially dangerous SKB_GSO_DODGY packets. Convert tunnels that now call inet_gso_segment and ipv6_gso_segment directly to have their own callbacks and extend validation to these. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 11 ++++++++++- net/ipv6/ip6_offload.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0dfb72c46671..eab3ebde981e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,6 +1385,15 @@ out: } EXPORT_SYMBOL(inet_gso_segment); +static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) + return ERR_PTR(-EINVAL); + + return inet_gso_segment(skb, features); +} + INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, @@ -1861,7 +1870,7 @@ static struct packet_offload ip_packet_offload __read_mostly = { static const struct net_offload ipip_offload = { .callbacks = { - .gso_segment = inet_gso_segment, + .gso_segment = ipip_gso_segment, .gro_receive = ipip_gro_receive, .gro_complete = ipip_gro_complete, }, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5c045691c302..345882d9c061 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -383,9 +383,36 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static struct sk_buff *sit_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + +static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return inet_gso_segment(skb, features); +} + +static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + static const struct net_offload sit_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = sit_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, @@ -393,7 +420,7 @@ static const struct net_offload sit_offload = { static const struct net_offload ip4ip6_offload = { .callbacks = { - .gso_segment = inet_gso_segment, + .gso_segment = ip4ip6_gso_segment, .gro_receive = ip4ip6_gro_receive, .gro_complete = ip4ip6_gro_complete, }, @@ -401,7 +428,7 @@ static const struct net_offload ip4ip6_offload = { static const struct net_offload ip6ip6_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = ip6ip6_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = ip6ip6_gro_complete, }, From 3d210534cc93dab39a9d1a8d674aa2872c268d38 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 16 Feb 2019 10:58:26 -0800 Subject: [PATCH 1375/1436] net_sched: fix a race condition in tcindex_destroy() (cherry picked from commit 8015d93ebd27484418d4952284fd02172fa4b0b2) tcindex_destroy() invokes tcindex_destroy_element() via a walker to delete each filter result in its perfect hash table, and tcindex_destroy_element() calls tcindex_delete() which schedules tcf RCU works to do the final deletion work. Unfortunately this races with the RCU callback __tcindex_destroy(), which could lead to use-after-free as reported by Adrian. Fix this by migrating this RCU callback to tcf RCU work too, as that workqueue is ordered, we will not have use-after-free. Note, we don't need to hold netns refcnt because we don't call tcf_exts_destroy() here. Fixes: 27ce4f05e2ab ("net_sched: use tcf_queue_work() in tcindex filter") Reported-by: Adrian Cc: Ben Hutchings Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e1981628047b..81a433ae31b3 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -48,7 +48,7 @@ struct tcindex_data { u32 hash; /* hash table size; 0 if undefined */ u32 alloc_hash; /* allocated size */ u32 fall_through; /* 0: only classify if explicit match */ - struct rcu_head rcu; + struct rcu_work rwork; }; static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) @@ -229,9 +229,11 @@ static int tcindex_destroy_element(struct tcf_proto *tp, return tcindex_delete(tp, arg, &last, false, NULL); } -static void __tcindex_destroy(struct rcu_head *head) +static void tcindex_destroy_work(struct work_struct *work) { - struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + struct tcindex_data *p = container_of(to_rcu_work(work), + struct tcindex_data, + rwork); kfree(p->perfect); kfree(p->h); @@ -258,9 +260,11 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r) return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } -static void __tcindex_partial_destroy(struct rcu_head *head) +static void tcindex_partial_destroy_work(struct work_struct *work) { - struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + struct tcindex_data *p = container_of(to_rcu_work(work), + struct tcindex_data, + rwork); kfree(p->perfect); kfree(p); @@ -480,7 +484,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (oldp) - call_rcu(&oldp->rcu, __tcindex_partial_destroy); + tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work); return 0; errout_alloc: @@ -572,7 +576,7 @@ static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker, true); - call_rcu(&p->rcu, __tcindex_destroy); + tcf_queue_work(&p->rwork, tcindex_destroy_work); } From 51dcb69de67a5141a4f39b03402b2c5160e91040 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 16 Feb 2019 10:58:27 -0800 Subject: [PATCH 1376/1436] net_sched: fix a memory leak in cls_tcindex (cherry picked from commit 033b228e7f26b29ae37f8bfa1bc6b209a5365e9f) When tcindex_destroy() destroys all the filter results in the perfect hash table, it invokes the walker to delete each of them. However, results with class==0 are skipped in either tcindex_walk() or tcindex_delete(), which causes a memory leak reported by kmemleak. This patch fixes it by skipping the walker and directly deleting these filter results so we don't miss any filter result. As a result of this change, we have to initialize exts->net properly in tcindex_alloc_perfect_hash(). For net-next, we need to consider whether we should initialize ->net in tcf_exts_init() instead, before that just directly test CONFIG_NET_CLS_ACT=y. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 81a433ae31b3..fbf3519a12d8 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -221,14 +221,6 @@ found: return 0; } -static int tcindex_destroy_element(struct tcf_proto *tp, - void *arg, struct tcf_walker *walker) -{ - bool last; - - return tcindex_delete(tp, arg, &last, false, NULL); -} - static void tcindex_destroy_work(struct work_struct *work) { struct tcindex_data *p = container_of(to_rcu_work(work), @@ -568,13 +560,32 @@ static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcf_walker walker; + int i; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); - walker.count = 0; - walker.skip = 0; - walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker, true); + + if (p->perfect) { + for (i = 0; i < p->hash; i++) { + struct tcindex_filter_result *r = p->perfect + i; + + tcf_unbind_filter(tp, &r->res); + if (tcf_exts_get_net(&r->exts)) + tcf_queue_work(&r->rwork, + tcindex_destroy_rexts_work); + else + __tcindex_destroy_rexts(r); + } + } + + for (i = 0; p->h && i < p->hash; i++) { + struct tcindex_filter *f, *next; + bool last; + + for (f = rtnl_dereference(p->h[i]); f; f = next) { + next = rtnl_dereference(f->next); + tcindex_delete(tp, &f->result, &last, rtnl_held, NULL); + } + } tcf_queue_work(&p->rwork, tcindex_destroy_work); } From 29f000f7751e3c7f4d26decfe5094090504fd8ba Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 20:44:16 +0100 Subject: [PATCH 1377/1436] net: phy: disable aneg in genphy_c45_pma_setup_forced When genphy_c45_pma_setup_forced() is called the "aneg enabled" bit may still be set, therefore clear it. This is also in line with what genphy_setup_forced() does for Clause 22. v2: - fix typo Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 1cf5e8ae46de..16636d49bd14 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -70,7 +70,11 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) if (ret < 0) return ret; - return phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL2, ctrl2); + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL2, ctrl2); + if (ret < 0) + return ret; + + return genphy_c45_an_disable_aneg(phydev); } EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); From 30de65c3d5a3559abc80d4b508c646438c06790c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 20:44:59 +0100 Subject: [PATCH 1378/1436] net: phy: marvell10g: improve mv3310_config_aneg Now that genphy_c45_pma_setup_forced() makes sure the "aneg enabled" bit is cleared, the call to genphy_c45_an_disable_aneg() isn't needed any longer. And the code pattern is now the same as in genphy_config_aneg(). Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index f9e0a2fc0277..9ea27acf05ad 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -268,13 +268,8 @@ static int mv3310_config_aneg(struct phy_device *phydev) /* We don't support manual MDI control */ phydev->mdix_ctrl = ETH_TP_MDI_AUTO; - if (phydev->autoneg == AUTONEG_DISABLE) { - ret = genphy_c45_pma_setup_forced(phydev); - if (ret < 0) - return ret; - - return genphy_c45_an_disable_aneg(phydev); - } + if (phydev->autoneg == AUTONEG_DISABLE) + return genphy_c45_pma_setup_forced(phydev); ret = genphy_c45_an_config_aneg(phydev); if (ret < 0) From 64ccfd2dbbdff4946e114e36cb8d58f432cccc50 Mon Sep 17 00:00:00 2001 From: Vishal Kulkarni Date: Sun, 17 Feb 2019 09:45:30 +0530 Subject: [PATCH 1379/1436] cxgb4: Mask out interrupts that are not enabled. There are rare cases where a PL_INT_CAUSE bit may end up getting set when the corresponding PL_INT_ENABLE bit isn't set. Signed-off-by: Vishal Kulkarni Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 27af347be4af..49e4374d584c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4962,7 +4962,13 @@ static void pl_intr_handler(struct adapter *adap) */ int t4_slow_intr_handler(struct adapter *adapter) { - u32 cause = t4_read_reg(adapter, PL_INT_CAUSE_A); + /* There are rare cases where a PL_INT_CAUSE bit may end up getting + * set when the corresponding PL_INT_ENABLE bit isn't set. It's + * easiest just to mask that case here. + */ + u32 raw_cause = t4_read_reg(adapter, PL_INT_CAUSE_A); + u32 enable = t4_read_reg(adapter, PL_INT_ENABLE_A); + u32 cause = raw_cause & enable; if (!(cause & GLBL_INTR_MASK)) return 0; @@ -5014,7 +5020,7 @@ int t4_slow_intr_handler(struct adapter *adapter) ulptx_intr_handler(adapter); /* Clear the interrupts just processed for which we are the master. */ - t4_write_reg(adapter, PL_INT_CAUSE_A, cause & GLBL_INTR_MASK); + t4_write_reg(adapter, PL_INT_CAUSE_A, raw_cause & GLBL_INTR_MASK); (void)t4_read_reg(adapter, PL_INT_CAUSE_A); /* flush */ return 1; } From cecc7a317defb1b396630c513cdf2ad2d12c7bd3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Feb 2019 13:00:58 +0100 Subject: [PATCH 1380/1436] net/smc: cleanup for smcr_tx_sndbuf_nonempty Use local variable pflags from the beginning of function smcr_tx_sndbuf_nonempty Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index a3bff08ff8c8..f0de323d15d6 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -483,7 +483,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, */ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { - struct smc_cdc_producer_flags *pflags; + struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; @@ -506,7 +506,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) } spin_lock_bh(&conn->send_lock); - if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { + if (!pflags->urg_data_present) { rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], @@ -516,7 +516,6 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) } rc = smc_cdc_msg_send(conn, wr_buf, pend); - pflags = &conn->local_tx_ctrl.prod_flags; if (!rc && pflags->urg_data_present) { pflags->urg_data_pending = 0; pflags->urg_data_present = 0; From 390dde08446ececfaafe63d0db14ceff15f03886 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:00:59 +0100 Subject: [PATCH 1381/1436] s390/net: convert pnetids to ascii Pnetids are retrieved from the underlying hardware as EBCDIC. This patch converts pnetids to ASCII. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- arch/s390/net/pnet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c index 9ecdbdf59781..79211bec0fc8 100644 --- a/arch/s390/net/pnet.c +++ b/arch/s390/net/pnet.c @@ -12,6 +12,7 @@ #include #include #include +#include #define PNETIDS_LEN 64 /* Total utility string length in bytes * to cover up to 4 PNETIDs of 16 bytes @@ -48,6 +49,7 @@ static int pnet_ids_by_device(struct device *dev, u8 *pnetids) if (!util_str) return -ENOMEM; memcpy(pnetids, util_str, PNETIDS_LEN); + EBCASC(pnetids, PNETIDS_LEN); kfree(util_str); return 0; } @@ -55,6 +57,7 @@ static int pnet_ids_by_device(struct device *dev, u8 *pnetids) struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str)); + EBCASC(pnetids, sizeof(zdev->util_str)); return 0; } return -EOPNOTSUPP; From 890a2cb4a966a097961fad0747d4f63e09b79da7 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:01:00 +0100 Subject: [PATCH 1382/1436] net/smc: rework pnet table If a device does not have a pnetid, users can set a temporary pnetid for said device in the pnet table. This patch reworks the pnet table to make it more flexible. Multiple entries with the same pnetid but differing devices are now allowed. Additionally, the netlink interface now sends each mapping from pnetid to device separately to the user while maintaining the message format existing applications might expect. Also, the SMC data structure for ib devices already has a pnetid attribute. So, it is used to store the user defined pnetids. As a result, the pnet table entries are only used for netdevs. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 1 - net/smc/smc_ib.h | 2 + net/smc/smc_pnet.c | 445 +++++++++++++++++++++++++-------------------- net/smc/smc_pnet.h | 1 - 4 files changed, 252 insertions(+), 197 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0b244be24fe0..53f429c04843 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -564,7 +564,6 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); - smc_pnet_remove_by_ibdev(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); kfree(smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index bac7fd65a4c0..da60ab9e8d70 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -42,6 +42,8 @@ struct smc_ib_device { /* ib-device infos for smc */ /* mac address per port*/ u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; /* pnetid per port */ + bool pnetid_by_user[SMC_MAX_PORTS]; + /* pnetid defined by user? */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 632c3109dee5..327bf2afe820 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -24,6 +24,10 @@ #include "smc_ib.h" #include "smc_ism.h" +#define SMC_ASCII_BLANK 32 + +static struct net_device *pnet_find_base_ndev(struct net_device *ndev); + static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, @@ -56,13 +60,14 @@ static struct smc_pnettable { }; /** - * struct smc_pnetentry - pnet identifier name entry + * struct smc_user_pnetentry - pnet identifier name entry for/from user * @list: List node. * @pnet_name: Pnet identifier name * @ndev: pointer to network device. * @smcibdev: Pointer to IB device. + * @ib_port: Port of IB device. */ -struct smc_pnetentry { +struct smc_user_pnetentry { struct list_head list; char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct net_device *ndev; @@ -70,33 +75,26 @@ struct smc_pnetentry { u8 ib_port; }; -/* Check if two RDMA device entries are identical. Use device name and port - * number for comparison. - */ -static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname, - u8 ibport) -{ - return pnetelem->ib_port == ibport && - !strncmp(pnetelem->smcibdev->ibdev->name, ibname, - sizeof(pnetelem->smcibdev->ibdev->name)); -} +/* pnet entry stored in pnet table */ +struct smc_pnetentry { + struct list_head list; + char pnet_name[SMC_MAX_PNETID_LEN + 1]; + struct net_device *ndev; +}; -/* Find a pnetid in the pnet table. - */ -static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) +/* Check if two given pnetids match */ +static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) { - struct smc_pnetentry *pnetelem, *found_pnetelem = NULL; + int i; - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (!strncmp(pnetelem->pnet_name, pnet_name, - sizeof(pnetelem->pnet_name))) { - found_pnetelem = pnetelem; + for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { + if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && + (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) break; - } + if (pnetid1[i] != pnetid2[i]) + return false; } - read_unlock(&smc_pnettable.lock); - return found_pnetelem; + return true; } /* Remove a pnetid from the pnet table. @@ -104,21 +102,39 @@ static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) static int smc_pnet_remove_by_pnetid(char *pnet_name) { struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_ib_device *ibdev; int rc = -ENOENT; + int ibport; + /* remove netdevices */ write_lock(&smc_pnettable.lock); list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, list) { - if (!strncmp(pnetelem->pnet_name, pnet_name, - sizeof(pnetelem->pnet_name))) { + if (!pnet_name || + smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); dev_put(pnetelem->ndev); kfree(pnetelem); rc = 0; - break; } } write_unlock(&smc_pnettable.lock); + /* remove ib devices */ + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { + if (ibdev->pnetid_by_user[ibport] && + (!pnet_name || + smc_pnet_match(pnet_name, + ibdev->pnetid[ibport]))) { + memset(ibdev->pnetid[ibport], 0, + SMC_MAX_PNETID_LEN); + ibdev->pnetid_by_user[ibport] = false; + rc = 0; + } + } + } + spin_unlock(&smc_ib_devices.lock); return rc; } @@ -144,53 +160,66 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) return rc; } -/* Remove a pnet entry mentioning a given ib device from the pnet table. - */ -int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev) -{ - struct smc_pnetentry *pnetelem, *tmp_pe; - int rc = -ENOENT; - - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, - list) { - if (pnetelem->smcibdev == ibdev) { - list_del(&pnetelem->list); - dev_put(pnetelem->ndev); - kfree(pnetelem); - rc = 0; - break; - } - } - write_unlock(&smc_pnettable.lock); - return rc; -} - /* Append a pnetid to the end of the pnet table if not already on this list. */ -static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) +static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) { + u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smc_pnetentry *tmp_pnetelem; struct smc_pnetentry *pnetelem; - int rc = -EEXIST; + struct net_device *ndev; + bool new_netdev = true; + bool new_ibdev = false; + + if (new_pnetelem->smcibdev) { + struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; + int ib_port = new_pnetelem->ib_port; + + spin_lock(&smc_ib_devices.lock); + if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { + memcpy(ib_dev->pnetid[ib_port - 1], + new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); + ib_dev->pnetid_by_user[ib_port - 1] = true; + new_ibdev = true; + } + spin_unlock(&smc_ib_devices.lock); + } + + if (!new_pnetelem->ndev) + return new_ibdev ? 0 : -EEXIST; + + /* check if (base) netdev already has a pnetid. If there is one, we do + * not want to add a pnet table entry + */ + ndev = pnet_find_base_ndev(new_pnetelem->ndev); + if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return new_ibdev ? 0 : -EEXIST; + + /* add a new netdev entry to the pnet table if there isn't one */ + tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); + if (!tmp_pnetelem) + return -ENOMEM; + memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, + SMC_MAX_PNETID_LEN); + tmp_pnetelem->ndev = new_pnetelem->ndev; write_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name, - sizeof(new_pnetelem->pnet_name)) || - !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name, - sizeof(new_pnetelem->ndev->name)) || - smc_pnet_same_ibname(pnetelem, - new_pnetelem->smcibdev->ibdev->name, - new_pnetelem->ib_port)) { - dev_put(pnetelem->ndev); - goto found; - } + if (pnetelem->ndev == new_pnetelem->ndev) + new_netdev = false; } - list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); - rc = 0; -found: - write_unlock(&smc_pnettable.lock); - return rc; + if (new_netdev) { + dev_hold(tmp_pnetelem->ndev); + list_add_tail(&tmp_pnetelem->list, &smc_pnettable.pnetlist); + write_unlock(&smc_pnettable.lock); + } else { + write_unlock(&smc_pnettable.lock); + kfree(tmp_pnetelem); + } + + return (new_netdev || new_ibdev) ? 0 : -EEXIST; } /* The limit for pnetid is 16 characters. @@ -241,7 +270,8 @@ out: /* Parse the supplied netlink attributes and fill a pnetentry structure. * For ethernet and infiniband device names verify that the devices exist. */ -static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, +static int smc_pnet_fill_entry(struct net *net, + struct smc_user_pnetentry *pnetelem, struct nlattr *tb[]) { char *string, *ibname; @@ -258,30 +288,29 @@ static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, goto error; rc = -EINVAL; - if (!tb[SMC_PNETID_ETHNAME]) - goto error; - rc = -ENOENT; - string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); - pnetelem->ndev = dev_get_by_name(net, string); - if (!pnetelem->ndev) - goto error; + if (tb[SMC_PNETID_ETHNAME]) { + string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); + pnetelem->ndev = dev_get_by_name(net, string); + if (!pnetelem->ndev) + goto error; + } rc = -EINVAL; - if (!tb[SMC_PNETID_IBNAME]) - goto error; - rc = -ENOENT; - ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); - ibname = strim(ibname); - pnetelem->smcibdev = smc_pnet_find_ib(ibname); - if (!pnetelem->smcibdev) - goto error; - - rc = -EINVAL; - if (!tb[SMC_PNETID_IBPORT]) - goto error; - pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); - if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) - goto error; + if (tb[SMC_PNETID_IBNAME]) { + ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); + ibname = strim(ibname); + pnetelem->smcibdev = smc_pnet_find_ib(ibname); + if (!pnetelem->smcibdev) + goto error; + if (pnetelem->smcibdev) { + if (!tb[SMC_PNETID_IBPORT]) + goto error; + pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); + if (pnetelem->ib_port < 1 || + pnetelem->ib_port > SMC_MAX_PORTS) + goto error; + } + } return 0; @@ -292,71 +321,44 @@ error: } /* Convert an smc_pnetentry to a netlink attribute sequence */ -static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) +static int smc_pnet_set_nla(struct sk_buff *msg, + struct smc_user_pnetentry *pnetelem) { - if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) || - nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) || - nla_put_string(msg, SMC_PNETID_IBNAME, - pnetelem->smcibdev->ibdev->name) || - nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) + if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) return -1; + if (pnetelem->ndev) { + if (nla_put_string(msg, SMC_PNETID_ETHNAME, + pnetelem->ndev->name)) + return -1; + } else { + if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) + return -1; + } + if (pnetelem->smcibdev) { + if (nla_put_string(msg, SMC_PNETID_IBNAME, + pnetelem->smcibdev->ibdev->name) || + nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) + return -1; + } else { + if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || + nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) + return -1; + } + return 0; } -/* Retrieve one PNETID entry */ -static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) -{ - struct smc_pnetentry *pnetelem; - struct sk_buff *msg; - void *hdr; - int rc; - - if (!info->attrs[SMC_PNETID_NAME]) - return -EINVAL; - pnetelem = smc_pnet_find_pnetid( - (char *)nla_data(info->attrs[SMC_PNETID_NAME])); - if (!pnetelem) - return -ENOENT; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &smc_pnet_nl_family, 0, SMC_PNETID_GET); - if (!hdr) { - rc = -EMSGSIZE; - goto err_out; - } - - if (smc_pnet_set_nla(msg, pnetelem)) { - rc = -ENOBUFS; - goto err_out; - } - - genlmsg_end(msg, hdr); - return genlmsg_reply(msg, info); - -err_out: - nlmsg_free(msg); - return rc; -} - static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct smc_pnetentry *pnetelem; + struct smc_user_pnetentry pnetelem; int rc; - pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); - if (!pnetelem) - return -ENOMEM; - rc = smc_pnet_fill_entry(net, pnetelem, info->attrs); + rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); if (!rc) - rc = smc_pnet_enter(pnetelem); - if (rc) { - kfree(pnetelem); - return rc; - } + rc = smc_pnet_enter(&pnetelem); + if (pnetelem.ndev) + dev_put(pnetelem.ndev); return rc; } @@ -376,7 +378,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb) static int smc_pnet_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, u32 flags, - struct smc_pnetentry *pnetelem) + struct smc_user_pnetentry *pnetelem) { void *hdr; @@ -392,42 +394,107 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb, return 0; } -static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, + int start_idx) { + struct smc_user_pnetentry tmp_entry; struct smc_pnetentry *pnetelem; + struct smc_ib_device *ibdev; int idx = 0; + int ibport; + /* dump netdevices */ read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (idx++ < cb->args[0]) + if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; - if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - pnetelem)) { + if (idx++ < start_idx) + continue; + memset(&tmp_entry, 0, sizeof(tmp_entry)); + memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, + SMC_MAX_PNETID_LEN); + tmp_entry.ndev = pnetelem->ndev; + if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, + &tmp_entry)) { --idx; break; } } - cb->args[0] = idx; read_unlock(&smc_pnettable.lock); + + /* dump ib devices */ + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { + if (ibdev->pnetid_by_user[ibport]) { + if (pnetid && + !smc_pnet_match(ibdev->pnetid[ibport], + pnetid)) + continue; + if (idx++ < start_idx) + continue; + memset(&tmp_entry, 0, sizeof(tmp_entry)); + memcpy(&tmp_entry.pnet_name, + ibdev->pnetid[ibport], + SMC_MAX_PNETID_LEN); + tmp_entry.smcibdev = ibdev; + tmp_entry.ib_port = ibport + 1; + if (smc_pnet_dumpinfo(skb, portid, seq, + NLM_F_MULTI, + &tmp_entry)) { + --idx; + break; + } + } + } + } + spin_unlock(&smc_ib_devices.lock); + + return idx; +} + +static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx; + + idx = _smc_pnet_dump(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NULL, cb->args[0]); + + cb->args[0] = idx; return skb->len; } +/* Retrieve one PNETID entry */ +static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *hdr; + + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + _smc_pnet_dump(msg, info->snd_portid, info->snd_seq, + nla_data(info->attrs[SMC_PNETID_NAME]), 0); + + /* finish multi part message and send it */ + hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, + NLM_F_MULTI); + if (!hdr) { + nlmsg_free(msg); + return -EMSGSIZE; + } + return genlmsg_reply(msg, info); +} + /* Remove and delete all pnetids from pnet table. */ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { - struct smc_pnetentry *pnetelem, *tmp_pe; - - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, - list) { - list_del(&pnetelem->list); - dev_put(pnetelem->ndev); - kfree(pnetelem); - } - write_unlock(&smc_pnettable.lock); - return 0; + return smc_pnet_remove_by_pnetid(NULL); } /* SMC_PNETID generic netlink operation definition */ @@ -534,6 +601,25 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) return ndev; } +static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *netdev, + u8 *pnetid) +{ + struct smc_pnetentry *pnetelem; + int rc = -ENOENT; + + read_lock(&smc_pnettable.lock); + list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { + if (netdev == pnetelem->ndev) { + /* get pnetid of netdev device */ + memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); + rc = 0; + break; + } + } + read_unlock(&smc_pnettable.lock); + return rc; +} + /* Determine the corresponding IB device port based on the hardware PNETID. * Searching stops at the first matching active IB device port with vlan_id * configured. @@ -549,7 +635,8 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, - ndev_pnetid)) + ndev_pnetid) && + smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ spin_lock(&smc_ib_devices.lock); @@ -557,8 +644,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; - if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, - SMC_MAX_PNETID_LEN) && + if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && smc_ib_port_active(ibdev, i) && !smc_ib_determine_gid(ibdev, i, vlan_id, gid, NULL)) { @@ -593,31 +679,6 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, spin_unlock(&smcd_dev_list.lock); } -/* Lookup of coupled ib_device via SMC pnet table */ -static void smc_pnet_find_roce_by_table(struct net_device *netdev, - struct smc_ib_device **smcibdev, - u8 *ibport, unsigned short vlan_id, - u8 gid[]) -{ - struct smc_pnetentry *pnetelem; - - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (netdev == pnetelem->ndev) { - if (smc_ib_port_active(pnetelem->smcibdev, - pnetelem->ib_port) && - !smc_ib_determine_gid(pnetelem->smcibdev, - pnetelem->ib_port, vlan_id, - gid, NULL)) { - *smcibdev = pnetelem->smcibdev; - *ibport = pnetelem->ib_port; - } - break; - } - } - read_unlock(&smc_pnettable.lock); -} - /* PNET table analysis for a given sock: * determine ib_device and port belonging to used internal TCP socket * ethernet interface. @@ -636,13 +697,7 @@ void smc_pnet_find_roce_resource(struct sock *sk, if (!dst->dev) goto out_rel; - /* if possible, lookup via hardware-defined pnetid */ smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); - if (*smcibdev) - goto out_rel; - - /* lookup via SMC PNET table */ - smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); out_rel: dst_release(dst); diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 8ff777636e32..37044e4ee50f 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -31,7 +31,6 @@ static inline int smc_pnetid_by_dev_port(struct device *dev, int smc_pnet_init(void) __init; void smc_pnet_exit(void); -int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport, unsigned short vlan_id, u8 gid[]); From f3d74b2245a0e8b994f53df1d7982d367fc63dfe Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:01:01 +0100 Subject: [PATCH 1383/1436] net/smc: add smcd support to the pnet table Currently, users can only set pnetids for netdevs and ib devices in the pnet table. This patch adds support for smcd devices to the pnet table. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/smc.h | 1 + net/smc/smc_pnet.c | 87 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/include/net/smc.h b/include/net/smc.h index 9ef49f8b1002..bd9c0fb3b577 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -74,6 +74,7 @@ struct smcd_dev { struct list_head vlan; struct workqueue_struct *event_wq; u8 pnetid[SMC_MAX_PNETID_LEN]; + bool pnetid_by_user; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 327bf2afe820..5497a8b44287 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -66,6 +66,7 @@ static struct smc_pnettable { * @ndev: pointer to network device. * @smcibdev: Pointer to IB device. * @ib_port: Port of IB device. + * @smcd_dev: Pointer to smcd device. */ struct smc_user_pnetentry { struct list_head list; @@ -73,6 +74,7 @@ struct smc_user_pnetentry { struct net_device *ndev; struct smc_ib_device *smcibdev; u8 ib_port; + struct smcd_dev *smcd_dev; }; /* pnet entry stored in pnet table */ @@ -103,6 +105,7 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) { struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_ib_device *ibdev; + struct smcd_dev *smcd_dev; int rc = -ENOENT; int ibport; @@ -135,6 +138,18 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) } } spin_unlock(&smc_ib_devices.lock); + /* remove smcd devices */ + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { + if (smcd_dev->pnetid_by_user && + (!pnet_name || + smc_pnet_match(pnet_name, smcd_dev->pnetid))) { + memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); + smcd_dev->pnetid_by_user = false; + rc = 0; + } + } + spin_unlock(&smcd_dev_list.lock); return rc; } @@ -168,6 +183,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smc_pnetentry *tmp_pnetelem; struct smc_pnetentry *pnetelem; + bool new_smcddev = false; struct net_device *ndev; bool new_netdev = true; bool new_ibdev = false; @@ -185,9 +201,21 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) } spin_unlock(&smc_ib_devices.lock); } + if (new_pnetelem->smcd_dev) { + struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; + + spin_lock(&smcd_dev_list.lock); + if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { + memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, + SMC_MAX_PNETID_LEN); + smcd_dev->pnetid_by_user = true; + new_smcddev = true; + } + spin_unlock(&smcd_dev_list.lock); + } if (!new_pnetelem->ndev) - return new_ibdev ? 0 : -EEXIST; + return (new_ibdev || new_smcddev) ? 0 : -EEXIST; /* check if (base) netdev already has a pnetid. If there is one, we do * not want to add a pnet table entry @@ -195,7 +223,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) ndev = pnet_find_base_ndev(new_pnetelem->ndev); if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid)) - return new_ibdev ? 0 : -EEXIST; + return (new_ibdev || new_smcddev) ? 0 : -EEXIST; /* add a new netdev entry to the pnet table if there isn't one */ tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); @@ -219,7 +247,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) kfree(tmp_pnetelem); } - return (new_netdev || new_ibdev) ? 0 : -EEXIST; + return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; } /* The limit for pnetid is 16 characters. @@ -267,6 +295,23 @@ out: return ibdev; } +/* Find an smcd device by a given name. The device might not exist. */ +static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) +{ + struct smcd_dev *smcd_dev; + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { + if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, + IB_DEVICE_NAME_MAX - 1)) + goto out; + } + smcd_dev = NULL; +out: + spin_unlock(&smcd_dev_list.lock); + return smcd_dev; +} + /* Parse the supplied netlink attributes and fill a pnetentry structure. * For ethernet and infiniband device names verify that the devices exist. */ @@ -300,7 +345,8 @@ static int smc_pnet_fill_entry(struct net *net, ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); ibname = strim(ibname); pnetelem->smcibdev = smc_pnet_find_ib(ibname); - if (!pnetelem->smcibdev) + pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); + if (!pnetelem->smcibdev && !pnetelem->smcd_dev) goto error; if (pnetelem->smcibdev) { if (!tb[SMC_PNETID_IBPORT]) @@ -339,6 +385,11 @@ static int smc_pnet_set_nla(struct sk_buff *msg, pnetelem->smcibdev->ibdev->name) || nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) return -1; + } else if (pnetelem->smcd_dev) { + if (nla_put_string(msg, SMC_PNETID_IBNAME, + dev_name(&pnetelem->smcd_dev->dev)) || + nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) + return -1; } else { if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) @@ -400,6 +451,7 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, struct smc_user_pnetentry tmp_entry; struct smc_pnetentry *pnetelem; struct smc_ib_device *ibdev; + struct smcd_dev *smcd_dev; int idx = 0; int ibport; @@ -450,6 +502,27 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, } spin_unlock(&smc_ib_devices.lock); + /* dump smcd devices */ + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { + if (smcd_dev->pnetid_by_user) { + if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) + continue; + if (idx++ < start_idx) + continue; + memset(&tmp_entry, 0, sizeof(tmp_entry)); + memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, + SMC_MAX_PNETID_LEN); + tmp_entry.smcd_dev = smcd_dev; + if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, + &tmp_entry)) { + --idx; + break; + } + } + } + spin_unlock(&smcd_dev_list.lock); + return idx; } @@ -666,12 +739,13 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, - ndev_pnetid)) + ndev_pnetid) && + smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ spin_lock(&smcd_dev_list.lock); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { - if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { + if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { *smcismdev = ismdev; break; } @@ -715,7 +789,6 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) if (!dst->dev) goto out_rel; - /* if possible, lookup via hardware-defined pnetid */ smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); out_rel: From 64e28b52c7a6616217bee67ba2ad886f478f9737 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:01:02 +0100 Subject: [PATCH 1384/1436] net/smc: add pnet table namespace support This patch adds namespace support to the pnet table code. Each network namespace gets its own pnet table. Infiniband and smcd device pnetids can only be modified in the initial namespace. In other namespaces they can still be used as if they were set by the underlying hardware. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 28 +++++++++ net/smc/smc_netns.h | 20 ++++++ net/smc/smc_pnet.c | 145 +++++++++++++++++++++++++++++++------------- net/smc/smc_pnet.h | 12 ++++ 4 files changed, 162 insertions(+), 43 deletions(-) create mode 100644 net/smc/smc_netns.h diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 46fa9f3016cc..77ef53596d18 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -30,6 +30,10 @@ #include #include +#include +#include +#include "smc_netns.h" + #include "smc.h" #include "smc_clc.h" #include "smc_llc.h" @@ -1966,10 +1970,33 @@ static const struct net_proto_family smc_sock_family_ops = { .create = smc_create, }; +unsigned int smc_net_id; + +static __net_init int smc_net_init(struct net *net) +{ + return smc_pnet_net_init(net); +} + +static void __net_exit smc_net_exit(struct net *net) +{ + smc_pnet_net_exit(net); +} + +static struct pernet_operations smc_net_ops = { + .init = smc_net_init, + .exit = smc_net_exit, + .id = &smc_net_id, + .size = sizeof(struct smc_net), +}; + static int __init smc_init(void) { int rc; + rc = register_pernet_subsys(&smc_net_ops); + if (rc) + return rc; + rc = smc_pnet_init(); if (rc) return rc; @@ -2035,6 +2062,7 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto6); proto_unregister(&smc_proto); smc_pnet_exit(); + unregister_pernet_subsys(&smc_net_ops); } module_init(smc_init); diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h new file mode 100644 index 000000000000..e7a8fc4ae02f --- /dev/null +++ b/net/smc/smc_netns.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications + * + * Network namespace definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMC_NETNS_H +#define SMC_NETNS_H + +#include "smc_pnet.h" + +extern unsigned int smc_net_id; + +/* per-network namespace private data */ +struct smc_net { + struct smc_pnettable pnettable; +}; +#endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 5497a8b44287..878f5c085444 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -20,6 +20,9 @@ #include +#include +#include "smc_netns.h" + #include "smc_pnet.h" #include "smc_ib.h" #include "smc_ism.h" @@ -46,19 +49,6 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { static struct genl_family smc_pnet_nl_family; -/** - * struct smc_pnettable - SMC PNET table anchor - * @lock: Lock for list action - * @pnetlist: List of PNETIDs - */ -static struct smc_pnettable { - rwlock_t lock; - struct list_head pnetlist; -} smc_pnettable = { - .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), - .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) -}; - /** * struct smc_user_pnetentry - pnet identifier name entry for/from user * @list: List node. @@ -101,17 +91,23 @@ static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) /* Remove a pnetid from the pnet table. */ -static int smc_pnet_remove_by_pnetid(char *pnet_name) +static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) { struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_pnettable *pnettable; struct smc_ib_device *ibdev; struct smcd_dev *smcd_dev; + struct smc_net *sn; int rc = -ENOENT; int ibport; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + /* remove netdevices */ - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, + write_lock(&pnettable->lock); + list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || smc_pnet_match(pnetelem->pnet_name, pnet_name)) { @@ -121,7 +117,12 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) rc = 0; } } - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); + + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return rc; + /* remove ib devices */ spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { @@ -158,11 +159,17 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name) static int smc_pnet_remove_by_ndev(struct net_device *ndev) { struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_pnettable *pnettable; + struct net *net = dev_net(ndev); + struct smc_net *sn; int rc = -ENOENT; - write_lock(&smc_pnettable.lock); - list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, - list) { + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + + write_lock(&pnettable->lock); + list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->ndev == ndev) { list_del(&pnetelem->list); dev_put(pnetelem->ndev); @@ -171,13 +178,14 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) break; } } - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); return rc; } /* Append a pnetid to the end of the pnet table if not already on this list. */ -static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) +static int smc_pnet_enter(struct smc_pnettable *pnettable, + struct smc_user_pnetentry *new_pnetelem) { u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; @@ -233,17 +241,17 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem) SMC_MAX_PNETID_LEN); tmp_pnetelem->ndev = new_pnetelem->ndev; - write_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { + write_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->ndev == new_pnetelem->ndev) new_netdev = false; } if (new_netdev) { dev_hold(tmp_pnetelem->ndev); - list_add_tail(&tmp_pnetelem->list, &smc_pnettable.pnetlist); - write_unlock(&smc_pnettable.lock); + list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); + write_unlock(&pnettable->lock); } else { - write_unlock(&smc_pnettable.lock); + write_unlock(&pnettable->lock); kfree(tmp_pnetelem); } @@ -340,6 +348,10 @@ static int smc_pnet_fill_entry(struct net *net, goto error; } + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return 0; + rc = -EINVAL; if (tb[SMC_PNETID_IBNAME]) { ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); @@ -403,11 +415,17 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct smc_user_pnetentry pnetelem; + struct smc_pnettable *pnettable; + struct smc_net *sn; int rc; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); if (!rc) - rc = smc_pnet_enter(&pnetelem); + rc = smc_pnet_enter(pnettable, &pnetelem); if (pnetelem.ndev) dev_put(pnetelem.ndev); return rc; @@ -415,9 +433,11 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + if (!info->attrs[SMC_PNETID_NAME]) return -EINVAL; - return smc_pnet_remove_by_pnetid( + return smc_pnet_remove_by_pnetid(net, (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } @@ -445,19 +465,25 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb, return 0; } -static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, - int start_idx) +static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u8 *pnetid, int start_idx) { struct smc_user_pnetentry tmp_entry; + struct smc_pnettable *pnettable; struct smc_pnetentry *pnetelem; struct smc_ib_device *ibdev; struct smcd_dev *smcd_dev; + struct smc_net *sn; int idx = 0; int ibport; + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + /* dump netdevices */ - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { + read_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; if (idx++ < start_idx) @@ -472,7 +498,11 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, break; } } - read_unlock(&smc_pnettable.lock); + read_unlock(&pnettable->lock); + + /* if this is not the initial namespace, stop here */ + if (net != &init_net) + return idx; /* dump ib devices */ spin_lock(&smc_ib_devices.lock); @@ -528,9 +558,10 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int idx; - idx = _smc_pnet_dump(skb, NETLINK_CB(cb->skb).portid, + idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NULL, cb->args[0]); cb->args[0] = idx; @@ -540,6 +571,7 @@ static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) /* Retrieve one PNETID entry */ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *msg; void *hdr; @@ -550,7 +582,7 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - _smc_pnet_dump(msg, info->snd_portid, info->snd_seq, + _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, nla_data(info->attrs[SMC_PNETID_NAME]), 0); /* finish multi part message and send it */ @@ -567,7 +599,9 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) */ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) { - return smc_pnet_remove_by_pnetid(NULL); + struct net *net = genl_info_net(info); + + return smc_pnet_remove_by_pnetid(net, NULL); } /* SMC_PNETID generic netlink operation definition */ @@ -631,6 +665,18 @@ static struct notifier_block smc_netdev_notifier = { .notifier_call = smc_pnet_netdev_event }; +/* init network namespace */ +int smc_pnet_net_init(struct net *net) +{ + struct smc_net *sn = net_generic(net, smc_net_id); + struct smc_pnettable *pnettable = &sn->pnettable; + + INIT_LIST_HEAD(&pnettable->pnetlist); + rwlock_init(&pnettable->lock); + + return 0; +} + int __init smc_pnet_init(void) { int rc; @@ -644,9 +690,15 @@ int __init smc_pnet_init(void) return rc; } +/* exit network namespace */ +void smc_pnet_net_exit(struct net *net) +{ + /* flush pnet table */ + smc_pnet_remove_by_pnetid(net, NULL); +} + void smc_pnet_exit(void) { - smc_pnet_flush(NULL, NULL); unregister_netdevice_notifier(&smc_netdev_notifier); genl_unregister_family(&smc_pnet_nl_family); } @@ -674,22 +726,29 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) return ndev; } -static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *netdev, +static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, u8 *pnetid) { + struct smc_pnettable *pnettable; + struct net *net = dev_net(ndev); struct smc_pnetentry *pnetelem; + struct smc_net *sn; int rc = -ENOENT; - read_lock(&smc_pnettable.lock); - list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (netdev == pnetelem->ndev) { + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + + read_lock(&pnettable->lock); + list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { + if (ndev == pnetelem->ndev) { /* get pnetid of netdev device */ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); rc = 0; break; } } - read_unlock(&smc_pnettable.lock); + read_unlock(&pnettable->lock); return rc; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 37044e4ee50f..5eac42fb45d0 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -19,6 +19,16 @@ struct smc_ib_device; struct smcd_dev; +/** + * struct smc_pnettable - SMC PNET table anchor + * @lock: Lock for list action + * @pnetlist: List of PNETIDs + */ +struct smc_pnettable { + rwlock_t lock; + struct list_head pnetlist; +}; + static inline int smc_pnetid_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) { @@ -30,7 +40,9 @@ static inline int smc_pnetid_by_dev_port(struct device *dev, } int smc_pnet_init(void) __init; +int smc_pnet_net_init(struct net *net); void smc_pnet_exit(void); +void smc_pnet_net_exit(struct net *net); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport, unsigned short vlan_id, u8 gid[]); From af5f60c7e3d593c2fa31b9a62c16eae75f074de3 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Feb 2019 13:01:03 +0100 Subject: [PATCH 1385/1436] net/smc: allow PCI IDs as ib device names in the pnet table SMC-D devices are identified by their PCI IDs in the pnet table. In order to make usage of the pnet table more consistent for users, this patch adds this form of identification for ib devices as well. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 878f5c085444..25dfbe343e26 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -293,7 +293,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, - sizeof(ibdev->ibdev->name))) { + sizeof(ibdev->ibdev->name)) || + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1)) { goto out; } } @@ -394,7 +396,7 @@ static int smc_pnet_set_nla(struct sk_buff *msg, } if (pnetelem->smcibdev) { if (nla_put_string(msg, SMC_PNETID_IBNAME, - pnetelem->smcibdev->ibdev->name) || + dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) return -1; } else if (pnetelem->smcd_dev) { From 54719527fd06e80fce52b98537414035cd21e8d4 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 21 Feb 2019 14:12:01 +0200 Subject: [PATCH 1386/1436] devlink: Rename devlink health attributes Rename devlink health attributes for better reflect the attributes use. Add COUNT prefix on error counter attribute and recovery counter attribute. Fixes: 7afe335a8bed ("devlink: Add health get command") Signed-off-by: Aya Levin Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 4 ++-- net/core/devlink.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 53de8802a000..5bb4ea67d84f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -323,8 +323,8 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, /* u64 */ + DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 04d98550c78c..5135997ecbe7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4650,10 +4650,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, reporter->health_state)) goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, reporter->error_count, DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, reporter->recovery_count, DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, From 574b1e1f457c631d356e55aaef26ee638a96d548 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 21 Feb 2019 14:12:02 +0200 Subject: [PATCH 1387/1436] devlink: Modify reply of DEVLINK_CMD_HEALTH_REPORTER_GET Avoid sending attributes related to recovery: DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD and DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER in reply to DEVLINK_CMD_HEALTH_REPORTER_GET for a reporter which didn't register a recover operation. These parameters can't be configured on a reporter that did not provide a recover operation, thus not needed to return them. Fixes: 7afe335a8bed ("devlink: Add health get command") Signed-off-by: Aya Levin Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 5135997ecbe7..4f31ddc883e7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4656,11 +4656,13 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, reporter->recovery_count, DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + if (reporter->ops->recover && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, reporter->graceful_period, DEVLINK_ATTR_PAD)) goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + if (reporter->ops->recover && + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, reporter->auto_recover)) goto reporter_nest_cancel; if (reporter->dump_fmsg && From 0ebcebbef1cc50fb94ae17917208b04868de9c38 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 21 Feb 2019 06:03:31 -0800 Subject: [PATCH 1388/1436] qed: Read device port count from the shmem Read port count from the shared memory instead of driver deriving this value. This change simplifies the driver implementation and also avoids any dependencies for finding the port-count. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 6 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 103 ++++++++-------------- drivers/net/ethernet/qlogic/qed/qed_l2.c | 4 +- drivers/net/ethernet/qlogic/qed/qed_mcp.h | 4 - drivers/net/ethernet/qlogic/qed/qed_ptp.c | 2 +- 5 files changed, 48 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 2d21c94f19c2..43a57ec296fd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -753,6 +753,7 @@ struct qed_dev { #define CHIP_BOND_ID_SHIFT 0 u8 num_engines; + u8 num_ports; u8 num_ports_in_engine; u8 num_funcs_in_port; @@ -892,7 +893,6 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); int qed_device_num_engines(struct qed_dev *cdev); -int qed_device_get_port_id(struct qed_dev *cdev); void qed_set_fw_mac_addr(__le16 *fw_msb, __le16 *fw_mid, __le16 *fw_lsb, u8 *mac); @@ -939,6 +939,10 @@ bool qed_edpm_enabled(struct qed_hwfn *p_hwfn); writel((u32)val, (void __iomem *)((u8 __iomem *)\ (cdev->doorbells) + (db_addr))) +#define MFW_PORT(_p_hwfn) ((_p_hwfn)->abs_pf_id % \ + qed_device_num_ports((_p_hwfn)->cdev)) +int qed_device_num_ports(struct qed_dev *cdev); + /* Prototypes */ int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index e2cbd77646a2..9df8c4b3b54e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3269,55 +3269,43 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine); } -static void qed_hw_info_port_num_bb(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) -{ - u32 port_mode; - - port_mode = qed_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB); - - if (port_mode < 3) { - p_hwfn->cdev->num_ports_in_engine = 1; - } else if (port_mode <= 5) { - p_hwfn->cdev->num_ports_in_engine = 2; - } else { - DP_NOTICE(p_hwfn, "PORT MODE: %d not supported\n", - p_hwfn->cdev->num_ports_in_engine); - - /* Default num_ports_in_engine to something */ - p_hwfn->cdev->num_ports_in_engine = 1; - } -} - -static void qed_hw_info_port_num_ah(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) -{ - u32 port; - int i; - - p_hwfn->cdev->num_ports_in_engine = 0; - - for (i = 0; i < MAX_NUM_PORTS_K2; i++) { - port = qed_rd(p_hwfn, p_ptt, - CNIG_REG_NIG_PORT0_CONF_K2 + (i * 4)); - if (port & 1) - p_hwfn->cdev->num_ports_in_engine++; - } - - if (!p_hwfn->cdev->num_ports_in_engine) { - DP_NOTICE(p_hwfn, "All NIG ports are inactive\n"); - - /* Default num_ports_in_engine to something */ - p_hwfn->cdev->num_ports_in_engine = 1; - } -} - static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - if (QED_IS_BB(p_hwfn->cdev)) - qed_hw_info_port_num_bb(p_hwfn, p_ptt); - else - qed_hw_info_port_num_ah(p_hwfn, p_ptt); + u32 addr, global_offsize, global_addr, port_mode; + struct qed_dev *cdev = p_hwfn->cdev; + + /* In CMT there is always only one port */ + if (cdev->num_hwfns > 1) { + cdev->num_ports_in_engine = 1; + cdev->num_ports = 1; + return; + } + + /* Determine the number of ports per engine */ + port_mode = qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE); + switch (port_mode) { + case 0x0: + cdev->num_ports_in_engine = 1; + break; + case 0x1: + cdev->num_ports_in_engine = 2; + break; + case 0x2: + cdev->num_ports_in_engine = 4; + break; + default: + DP_NOTICE(p_hwfn, "Unknown port mode 0x%08x\n", port_mode); + cdev->num_ports_in_engine = 1; /* Default to something */ + break; + } + + /* Get the total number of ports of the device */ + addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, + PUBLIC_GLOBAL); + global_offsize = qed_rd(p_hwfn, p_ptt, addr); + global_addr = SECTION_ADDR(global_offsize, 0); + addr = global_addr + offsetof(struct public_global, max_ports); + cdev->num_ports = (u8)qed_rd(p_hwfn, p_ptt, addr); } static void qed_get_eee_caps(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -3355,7 +3343,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, return rc; } - qed_hw_info_port_num(p_hwfn, p_ptt); + if (IS_LEAD_HWFN(p_hwfn)) + qed_hw_info_port_num(p_hwfn, p_ptt); qed_mcp_get_capabilities(p_hwfn, p_ptt); @@ -4760,23 +4749,9 @@ void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) sizeof(*p_hwfn->qm_info.wfq_data) * p_hwfn->qm_info.num_vports); } -int qed_device_num_engines(struct qed_dev *cdev) +int qed_device_num_ports(struct qed_dev *cdev) { - return QED_IS_BB(cdev) ? 2 : 1; -} - -static int qed_device_num_ports(struct qed_dev *cdev) -{ - /* in CMT always only one port */ - if (cdev->num_hwfns > 1) - return 1; - - return cdev->num_ports_in_engine * qed_device_num_engines(cdev); -} - -int qed_device_get_port_id(struct qed_dev *cdev) -{ - return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev); + return cdev->num_ports; } void qed_set_fw_mac_addr(__le16 *fw_msb, diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 58be1c4c6668..57641728df69 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1898,6 +1898,7 @@ static void _qed_get_vport_stats(struct qed_dev *cdev, struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn) : NULL; + bool b_get_port_stats; if (IS_PF(cdev)) { /* The main vport index is relative first */ @@ -1912,8 +1913,9 @@ static void _qed_get_vport_stats(struct qed_dev *cdev, continue; } + b_get_port_stats = IS_PF(cdev) && IS_LEAD_HWFN(p_hwfn); __qed_get_vport_stats(p_hwfn, p_ptt, stats, fw_vport, - IS_PF(cdev) ? true : false); + b_get_port_stats); out: if (IS_PF(cdev) && p_ptt) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 2799e6741765..261c1a392e2c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -691,10 +691,6 @@ int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); rel_pfid) #define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id) -#define MFW_PORT(_p_hwfn) ((_p_hwfn)->abs_pf_id % \ - ((_p_hwfn)->cdev->num_ports_in_engine * \ - qed_device_num_engines((_p_hwfn)->cdev))) - struct qed_mcp_info { /* List for mailbox commands which were sent and wait for a response */ struct list_head cmd_list; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c index 5a90d69dc2f8..1302b308bd87 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c @@ -47,7 +47,7 @@ static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn) { - switch (qed_device_get_port_id(p_hwfn->cdev)) { + switch (MFW_PORT(p_hwfn)) { case 0: return QED_RESC_LOCK_PTP_PORT0; case 1: From 40d5432cd50edf2b8a8a466a2ad48b487fec17f6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 20:28:54 +0100 Subject: [PATCH 1389/1436] net: phy: remove orphaned register read in genphy_read_status After recent changes to genphy_read_status() this orphaned register read remained as leftover. So remove it. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7e71124bb20e..803197fdd87d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1785,10 +1785,6 @@ int genphy_read_status(struct phy_device *phydev) mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); - adv = phy_read(phydev, MII_ADVERTISE); - if (adv < 0) - return adv; - phydev->speed = SPEED_10; phydev->duplex = DUPLEX_HALF; phydev->pause = 0; From 51f9f234dac5284333a047b7906f3095d410e6c3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 20:29:36 +0100 Subject: [PATCH 1390/1436] net: phy: don't use 10BaseT/half as default in genphy_read_status If link partner and we can't agree on any mode, then it doesn't make sense to pretend we would have agreed on 10/half. Therefore set a proper default. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 803197fdd87d..4bb3b6c2894e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1785,8 +1785,8 @@ int genphy_read_status(struct phy_device *phydev) mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); - phydev->speed = SPEED_10; - phydev->duplex = DUPLEX_HALF; + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; phydev->pause = 0; phydev->asym_pause = 0; From cd34499cacf3c34e2e094ff2a347b9378417970c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:26:58 +0100 Subject: [PATCH 1391/1436] net: phy: export genphy_config_eee_advert We want to use this function in phy-c45.c too, therefore export it. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- include/linux/phy.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 4bb3b6c2894e..49fdd1ee798e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1575,7 +1575,7 @@ static int genphy_config_advert(struct phy_device *phydev) * efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't * changed, and 1 if it has changed. */ -static int genphy_config_eee_advert(struct phy_device *phydev) +int genphy_config_eee_advert(struct phy_device *phydev) { int err; @@ -1588,6 +1588,7 @@ static int genphy_config_eee_advert(struct phy_device *phydev) /* If the call failed, we assume that EEE is not supported */ return err < 0 ? 0 : err; } +EXPORT_SYMBOL(genphy_config_eee_advert); /** * genphy_setup_forced - configures/forces speed/duplex from @phydev diff --git a/include/linux/phy.h b/include/linux/phy.h index 3db507e68191..761131de4971 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1077,6 +1077,7 @@ void phy_attached_info(struct phy_device *phydev); int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); +int genphy_config_eee_advert(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); From cc429d529126e5bbe867e839d7de4f75b9daad44 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:27:12 +0100 Subject: [PATCH 1392/1436] net: phy: use genphy_config_eee_advert in genphy_c45_an_config_aneg Like in genphy_config_aneg() for clause 22 PHY's, we should keep modes from being advertised that are known to be broken with EEE. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 16636d49bd14..2f5721430e05 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -89,12 +89,14 @@ EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); */ int genphy_c45_an_config_aneg(struct phy_device *phydev) { - int changed = 0, ret; + int changed, ret; u32 adv; linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); + changed = genphy_config_eee_advert(phydev); + adv = linkmode_adv_to_mii_adv_t(phydev->advertising); ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, From 1af9f16840e920c6193490ae58371fc5cc28bb01 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:27:18 +0100 Subject: [PATCH 1393/1436] net: phy: add genphy_c45_check_and_restart_aneg This function will be used by config_aneg callback implementations of PHY drivers and allows to reduce boilerplate code. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 30 ++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 31 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 2f5721430e05..fc3173cc078b 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -156,6 +156,36 @@ int genphy_c45_restart_aneg(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); +/** + * genphy_c45_check_and_restart_aneg - Enable and restart auto-negotiation + * @phydev: target phy_device struct + * @restart: whether aneg restart is requested + * + * This assumes that the auto-negotiation MMD is present. + * + * Check, and restart auto-negotiation if needed. + */ +int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) +{ + int ret = 0; + + if (!restart) { + /* Configure and restart aneg if it wasn't set before */ + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (ret < 0) + return ret; + + if (!(ret & MDIO_AN_CTRL1_ENABLE)) + restart = true; + } + + if (restart) + ret = genphy_c45_restart_aneg(phydev); + + return ret; +} +EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg); + /** * genphy_c45_aneg_done - return auto-negotiation complete status * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 761131de4971..8e9fc576472b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1097,6 +1097,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); +int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart); int genphy_c45_aneg_done(struct phy_device *phydev); int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); From 6b4cb6cb1385337d72a2bc0bd3b264f54a7bfaae Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:27:46 +0100 Subject: [PATCH 1394/1436] net: phy: marvell10g: use genphy_c45_check_and_restart_aneg in mv3310_config_aneg Use new function genphy_c45_check_and_restart_aneg() to reduce boilerplate code. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 9ea27acf05ad..586ae1bc5a50 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -288,20 +288,7 @@ static int mv3310_config_aneg(struct phy_device *phydev) if (ret > 0) changed = true; - if (!changed) { - /* Configure and restart aneg if it wasn't set before */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); - if (ret < 0) - return ret; - - if (!(ret & MDIO_AN_CTRL1_ENABLE)) - changed = 1; - } - - if (changed) - ret = genphy_c45_restart_aneg(phydev); - - return ret; + return genphy_c45_check_and_restart_aneg(phydev, changed); } static int mv3310_aneg_done(struct phy_device *phydev) From ca8d4794f669e721fb5198f6d142e42dd8080239 Mon Sep 17 00:00:00 2001 From: Callum Sinclair Date: Mon, 18 Feb 2019 10:07:52 +1300 Subject: [PATCH 1395/1436] ipmr: ip6mr: Create new sockopt to clear mfc cache or vifs Currently the only way to clear the forwarding cache was to delete the entries one by one using the MRT_DEL_MFC socket option or to destroy and recreate the socket. Create a new socket option which with the use of optional flags can clear any combination of multicast entries (static or not static) and multicast vifs (static or not static). Calling the new socket option MRT_FLUSH with the flags MRT_FLUSH_MFC and MRT_FLUSH_VIFS will clear all entries and vifs on the socket except for static entries. Signed-off-by: Callum Sinclair Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 9 ++++- include/uapi/linux/mroute6.h | 9 ++++- net/ipv4/ipmr.c | 75 +++++++++++++++++++++------------- net/ipv6/ip6mr.c | 78 +++++++++++++++++++++++------------- 4 files changed, 115 insertions(+), 56 deletions(-) diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 5d37a9ccce63..11c8c1fc1124 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -28,12 +28,19 @@ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ #define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */ #define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */ -#define MRT_MAX (MRT_BASE+11) +#define MRT_FLUSH (MRT_BASE+12) /* Flush all mfc entries and/or vifs */ +#define MRT_MAX (MRT_BASE+12) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) +/* MRT_FLUSH optional flags */ +#define MRT_FLUSH_MFC 1 /* Flush multicast entries */ +#define MRT_FLUSH_MFC_STATIC 2 /* Flush static multicast entries */ +#define MRT_FLUSH_VIFS 4 /* Flush multicast vifs */ +#define MRT_FLUSH_VIFS_STATIC 8 /* Flush static multicast vifs */ + #define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 9999cc006390..c36177a86516 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -31,12 +31,19 @@ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ #define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */ #define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */ -#define MRT6_MAX (MRT6_BASE+11) +#define MRT6_FLUSH (MRT6_BASE+12) /* Flush all mfc entries and/or vifs */ +#define MRT6_MAX (MRT6_BASE+12) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) +/* MRT6_FLUSH optional flags */ +#define MRT6_FLUSH_MFC 1 /* Flush multicast entries */ +#define MRT6_FLUSH_MFC_STATIC 2 /* Flush static multicast entries */ +#define MRT6_FLUSH_MIFS 4 /* Flushing multicast vifs */ +#define MRT6_FLUSH_MIFS_STATIC 8 /* Flush static multicast vifs */ + #define MAXMIFS 32 typedef unsigned long mifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short mifi_t; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e536970557dd..2c931120c494 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -110,7 +110,7 @@ static int ipmr_cache_report(struct mr_table *mrt, static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); -static void mroute_clean_tables(struct mr_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -415,7 +415,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); + mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -1296,7 +1297,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, } /* Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct net *net = read_pnet(&mrt->net); struct mr_mfc *c, *tmp; @@ -1305,35 +1306,44 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) int i; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) - continue; - vif_delete(mrt, i, 0, &list); + if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { + for (i = 0; i < mrt->maxvif; i++) { + if (((mrt->vif_table[i].flags & VIFF_STATIC) && + !(flags & MRT_FLUSH_VIFS_STATIC)) || + (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) + continue; + vif_delete(mrt, i, 0, &list); + } + unregister_netdevice_many(&list); } - unregister_netdevice_many(&list); /* Wipe the cache */ - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); - list_del_rcu(&c->list); - cache = (struct mfc_cache *)c; - call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, - mrt->id); - mroute_netlink_event(mrt, cache, RTM_DELROUTE); - mr_cache_put(c); + if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || + (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); + list_del_rcu(&c->list); + cache = (struct mfc_cache *)c; + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, + mrt->id); + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + mr_cache_put(c); + } } - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { - list_del(&c->list); - cache = (struct mfc_cache *)c; - mroute_netlink_event(mrt, cache, RTM_DELROUTE); - ipmr_destroy_unres(mrt, cache); + if (flags & MRT_FLUSH_MFC) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { + list_del(&c->list); + cache = (struct mfc_cache *)c; + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + ipmr_destroy_unres(mrt, cache); + } + spin_unlock_bh(&mfc_unres_lock); } - spin_unlock_bh(&mfc_unres_lock); } } @@ -1354,7 +1364,7 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt, false); + mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); } } rtnl_unlock(); @@ -1479,6 +1489,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, sk == rtnl_dereference(mrt->mroute_sk), parent); break; + case MRT_FLUSH: + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mroute_clean_tables(mrt, val); + break; /* Control PIM assert. */ case MRT_ASSERT: if (optlen != sizeof(val)) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cc01aa3f2b5e..3594f1d9c68c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | + MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -1496,42 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct mr_mfc *c, *tmp; LIST_HEAD(list); int i; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) - continue; - mif6_delete(mrt, i, 0, &list); + if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { + for (i = 0; i < mrt->maxvif; i++) { + if (((mrt->vif_table[i].flags & VIFF_STATIC) && + !(flags & MRT6_FLUSH_MIFS_STATIC)) || + (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) + continue; + mif6_delete(mrt, i, 0, &list); + } + unregister_netdevice_many(&list); } - unregister_netdevice_many(&list); /* Wipe the cache */ - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); - list_del_rcu(&c->list); - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_ENTRY_DEL, - (struct mfc6_cache *)c, mrt->id); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); - mr_cache_put(c); + if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || + (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, mrt->id); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + mr_cache_put(c); + } } - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { - list_del(&c->list); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, - RTM_DELROUTE); - ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + if (flags & MRT6_FLUSH_MFC) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { + list_del(&c->list); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + } + spin_unlock_bh(&mfc_unres_lock); } - spin_unlock_bh(&mfc_unres_lock); } } @@ -1587,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); - mroute_clean_tables(mrt, false); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); err = 0; break; } @@ -1703,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns rtnl_unlock(); return ret; + case MRT6_FLUSH: + { + int flags; + + if (optlen != sizeof(flags)) + return -EINVAL; + if (get_user(flags, (int __user *)optval)) + return -EFAULT; + rtnl_lock(); + mroute_clean_tables(mrt, flags); + rtnl_unlock(); + return 0; + } + /* * Control PIM assert (to activate pim will activate assert) */ From 56b90fa02214e74263a12a227e71ddd748c3fab0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Feb 2019 22:52:09 +0000 Subject: [PATCH 1396/1436] lib/test_rhashtable: fix spelling mistake "existant" -> "existent" There are spelling mistakes in warning macro messages. Fix them. Signed-off-by: Colin Ian King Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index e52f8cafe227..2c0c53a99734 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -395,7 +395,7 @@ static int __init test_rhltable(unsigned int entries) if (WARN(err, "cannot remove element at slot %d", i)) continue; } else { - if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d", + if (WARN(err != -ENOENT, "removed non-existent element %d, error %d not %d", i, err, -ENOENT)) continue; } @@ -440,7 +440,7 @@ static int __init test_rhltable(unsigned int entries) if (WARN(err, "cannot remove element at slot %d", i)) continue; } else { - if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d", + if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d", err, -ENOENT)) continue; } From af736bf071e8ccf395fa7f7d9d105e020cf609cc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 Feb 2019 12:26:32 +0300 Subject: [PATCH 1397/1436] net: sched: potential NULL dereference in tcf_block_find() The error code isn't set on this path so it would result in returning ERR_PTR(0) and a NULL dereference in the caller. Fixes: 18d3eefb17cf ("net: sched: refactor tcf_block_find() into standalone functions") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/sched/cls_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9ad53895e604..28592e9f803f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1309,8 +1309,10 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, goto errout_qdisc; block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); - if (IS_ERR(block)) + if (IS_ERR(block)) { + err = PTR_ERR(block); goto errout_qdisc; + } return block; From a2b5a3fa2ce10411130b496ad0e55ef5a4971fd9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 10:15:56 +0800 Subject: [PATCH 1398/1436] net: remove unneeded switch fall-through This case block has been terminated by a return, so not need a switch fall-through Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 1 - net/ipv6/mcast_snoop.c | 1 - 2 files changed, 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b448cf32296c..6c2febc39dca 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1541,7 +1541,6 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb) case IGMP_HOST_LEAVE_MESSAGE: case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - /* fall through */ return 0; case IGMPV3_HOST_MEMBERSHIP_REPORT: return ip_mc_check_igmp_reportv3(skb); diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index 55e2ac179f28..dddd75d1be0e 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -128,7 +128,6 @@ static int ipv6_mc_check_mld_msg(struct sk_buff *skb) switch (mld->mld_type) { case ICMPV6_MGM_REDUCTION: case ICMPV6_MGM_REPORT: - /* fall through */ return 0; case ICMPV6_MLD2_REPORT: return ipv6_mc_check_mld_reportv2(skb); From 08e71623c8223723b19d079933974645d52967c6 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 10:17:09 +0800 Subject: [PATCH 1399/1436] bridge: remove redundant check on err in br_multicast_ipv4_rcv br_ip4_multicast_mrd_rcv only return 0 and -ENOMSG, no other negative value Signed-off-by: Li RongQing Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4a048fd1cbea..fe9f2d8ca2c1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1615,12 +1615,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (ip_hdr(skb)->protocol == IPPROTO_PIM) br_multicast_pim(br, port, skb); } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { - err = br_ip4_multicast_mrd_rcv(br, port, skb); - - if (err < 0 && err != -ENOMSG) { - br_multicast_err_count(br, port, skb->protocol); - return err; - } + br_ip4_multicast_mrd_rcv(br, port, skb); } return 0; From 57652796aa979d5754406c8177f716cb2cf60616 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Feb 2019 15:35:04 -0800 Subject: [PATCH 1400/1436] net: dsa: add support for bridge flags The Linux bridge implementation allows various properties of the bridge to be controlled, such as flooding unknown unicast and multicast frames. This patch adds the necessary DSA infrastructure to allow the Linux bridge support to control these properties for DSA switches. Reviewed-by: Vivien Didelot Signed-off-by: Russell King [florian: Add missing dp and ds variables declaration to fix build] Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/dsa_priv.h | 2 ++ net/dsa/port.c | 17 +++++++++++++++++ net/dsa/slave.c | 9 +++++++++ 4 files changed, 30 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 7f2a668ef2cc..2c2c10812814 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -400,6 +400,8 @@ struct dsa_switch_ops { void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_egress_floods)(struct dsa_switch *ds, int port, + bool unicast, bool multicast); /* * VLAN support diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1f4972dab9f2..f4f99ec29f5d 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -160,6 +160,8 @@ int dsa_port_mdb_add(const struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, + struct switchdev_trans *trans); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans); diff --git a/net/dsa/port.c b/net/dsa/port.c index 2d7e01b23572..6df29bddf37e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -177,6 +177,23 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); } +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err = 0; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if (ds->ops->port_egress_floods) + err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD, + flags & BR_MCAST_FLOOD); + + return err; +} + int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2e5e7c04821b..85dc68611002 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -295,6 +295,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev, case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans); + break; default: ret = -EOPNOTSUPP; break; @@ -381,9 +384,15 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev, static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = 0; + if (ds->ops->port_egress_floods) + attr->u.brport_flags_support |= BR_FLOOD | + BR_MCAST_FLOOD; break; default: return -EOPNOTSUPP; From 4f85901f0063e6f435125f8eb54d12e3108ab064 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Feb 2019 15:35:05 -0800 Subject: [PATCH 1401/1436] net: dsa: mv88e6xxx: add support for bridge flags Add support for the bridge flags to Marvell 88e6xxx bridges, allowing the multicast and unicast flood properties to be controlled. These can be controlled on a per-port basis via commands such as: bridge link set dev lan1 flood on|off bridge link set dev lan1 mcast_flood on|off Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 32e7af5caa69..cc7ce06b6d58 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4692,6 +4692,22 @@ static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, return err; } +static int mv88e6xxx_port_egress_floods(struct dsa_switch *ds, int port, + bool unicast, bool multicast) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err = -EOPNOTSUPP; + + mutex_lock(&chip->reg_lock); + if (chip->info->ops->port_set_egress_floods) + err = chip->info->ops->port_set_egress_floods(chip, port, + unicast, + multicast); + mutex_unlock(&chip->reg_lock); + + return err; +} + static const struct dsa_switch_ops mv88e6xxx_switch_ops = { #if IS_ENABLED(CONFIG_NET_DSA_LEGACY) .probe = mv88e6xxx_drv_probe, @@ -4719,6 +4735,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .set_ageing_time = mv88e6xxx_set_ageing_time, .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, + .port_egress_floods = mv88e6xxx_port_egress_floods, .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, From c138806344855cd7c094abbe7507832107e8171e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Feb 2019 15:35:06 -0800 Subject: [PATCH 1402/1436] net: dsa: enable flooding for bridge ports Switches work by learning the MAC address for each attached station by monitoring traffic from each station. When a station sends a packet, the switch records which port the MAC address is connected to. With IPv4 networking, before communication commences with a neighbour, an ARP packet is broadcasted to all stations asking for the MAC address corresponding with the IPv4. The desired station responds with an ARP reply, and the ARP reply causes the switch to learn which port the station is connected to. With IPv6 networking, the situation is rather different. Rather than broadcasting ARP packets, a "neighbour solicitation" is multicasted rather than broadcasted. This multicast needs to reach the intended station in order for the neighbour to be discovered. Once a neighbour has been discovered, and entered into the sending stations neighbour cache, communication can restart at a point later without sending a new neighbour solicitation, even if the entry in the neighbour cache is marked as stale. This can be after the MAC address has expired from the forwarding cache of the DSA switch - when that occurs, there is a long pause in communication. Our DSA implementation for mv88e6xxx switches disables flooding of multicast and unicast frames for bridged ports. As per the above description, this is fine for IPv4 networking, since the broadcasted ARP queries will be sent to and received by all stations on the same network. However, this breaks IPv6 very badly - blocking neighbour solicitations and later causing connections to stall. The defaults that the Linux bridge code expect from bridges are for unknown unicast and unknown multicast frames to be flooded to all ports on the bridge, which is at odds to the defaults adopted by our DSA implementation for mv88e6xxx switches. This commit enables by default flooding of both unknown unicast and unknown multicast frames whenever a port is added to a bridge, and disables the flooding when a port leaves the bridge. This means that mv88e6xxx DSA switches now behave as per the bridge(8) man page, and IPv6 works flawlessly through such a switch. Reviewed-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: Russell King Signed-off-by: David S. Miller --- net/dsa/port.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index 6df29bddf37e..7bc2a5ad95c6 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -105,16 +105,23 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) }; int err; - /* Here the port is already bridged. Reflect the current configuration - * so that drivers can program their chips accordingly. + /* Set the flooding mode before joining the port in the switch */ + err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL); + if (err) + return err; + + /* Here the interface is already bridged. Reflect the current + * configuration so that drivers can program their chips accordingly. */ dp->bridge_dev = br; err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info); /* The bridging is rolled back on error */ - if (err) + if (err) { + dsa_port_bridge_flags(dp, 0, NULL); dp->bridge_dev = NULL; + } return err; } @@ -137,6 +144,9 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); + /* Port is leaving the bridge, disable flooding */ + dsa_port_bridge_flags(dp, 0, NULL); + /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ From 746dc184ba59aa2c843b9e888be3f1302d227b74 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:19 -0800 Subject: [PATCH 1403/1436] net: switchdev: Add PORT_PRE_BRIDGE_FLAGS In preparation for removing switchdev_port_attr_get(), introduce PORT_PRE_BRIDGE_FLAGS which will be called through switchdev_port_attr_set(), in the caller's context (possibly atomic) and which must be checked by the switchdev driver in order to return whether the operation is supported or not. This is entirely analoguous to how the BRIDGE_FLAGS_SUPPORT works, except it goes through a set() instead of get(). Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 5e87b54c5dc5..de72b0a3867f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, @@ -61,7 +62,7 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ - unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + unsigned long brport_flags; /* PORT_{PRE}_BRIDGE_FLAGS */ unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ From c19c44f867ca3c0bb3b820c9234590dbd0f7afbe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:20 -0800 Subject: [PATCH 1404/1436] mlxsw: spectrum: Handle PORT_PRE_BRIDGE_FLAGS In preparation for getting rid of switchdev_port_attr_get(), have mlxsw check for the bridge flags being set through switchdev_port_attr_set() when the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS attribute identifier is used. Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 1f492b7dbea8..9a8798f74d2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -595,6 +595,17 @@ err_port_bridge_vlan_learning_set: return err; } +static int mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port + *mlxsw_sp_port, + struct switchdev_trans *trans, + unsigned long brport_flags) +{ + if (brport_flags & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) + return -EINVAL; + + return 0; +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, struct net_device *orig_dev, @@ -841,6 +852,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->orig_dev, attr->u.stp_state); break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = mlxsw_sp_port_attr_br_pre_flags_set(mlxsw_sp_port, + trans, + attr->u.brport_flags); + break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, attr->orig_dev, From e1230246d0ad7571dba93a136f16e90daad187eb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:21 -0800 Subject: [PATCH 1405/1436] staging: fsl-dpaa2: ethsw: Handle PORT_PRE_BRIDGE_FLAGS In preparation for removing SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, handle the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS attribute and check that the bridge port flags being configured are supported. Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index 1b3943b71254..331625137717 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -666,6 +666,16 @@ static int port_attr_stp_state_set(struct net_device *netdev, return ethsw_port_set_stp_state(port_priv, state); } +static int port_attr_br_flags_pre_set(struct net_device *netdev, + struct switchdev_trans *trans, + unsigned long flags) +{ + if (flags & ~(BR_LEARNING | BR_FLOOD)) + return -EINVAL; + + return 0; +} + static int port_attr_br_flags_set(struct net_device *netdev, struct switchdev_trans *trans, unsigned long flags) @@ -698,6 +708,10 @@ static int swdev_port_attr_set(struct net_device *netdev, err = port_attr_stp_state_set(netdev, trans, attr->u.stp_state); break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = port_attr_br_flags_pre_set(netdev, trans, + attr->u.brport_flags); + break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = port_attr_br_flags_set(netdev, trans, attr->u.brport_flags); From ea87005a00f750c00d2aa59031c1110ea4952382 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:22 -0800 Subject: [PATCH 1406/1436] net: dsa: Add setter for SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS In preparation for removing SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, add support for a function that processes the SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS and SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS attributes and returns not supported for any flag set, since DSA does not currently support toggling those bridge port attributes (yet). Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 2 ++ net/dsa/port.c | 12 ++++++++++++ net/dsa/slave.c | 4 ++++ 3 files changed, 18 insertions(+) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f4f99ec29f5d..47a1d1379d15 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -160,6 +160,8 @@ int dsa_port_mdb_add(const struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, + struct switchdev_trans *trans); int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, struct switchdev_trans *trans); int dsa_port_vlan_add(struct dsa_port *dp, diff --git a/net/dsa/port.c b/net/dsa/port.c index 7bc2a5ad95c6..e9b5b50f8cf1 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -187,6 +187,18 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); } +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, + struct switchdev_trans *trans) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_egress_floods || + (flags & ~(BR_FLOOD | BR_MCAST_FLOOD))) + return -EINVAL; + + return 0; +} + int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, struct switchdev_trans *trans) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 85dc68611002..44cc4e50dd5a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -295,6 +295,10 @@ static int dsa_slave_port_attr_set(struct net_device *dev, case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, + trans); + break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans); break; From 93700458ff63836ff73d3301e5e98d2e0bb01e98 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:23 -0800 Subject: [PATCH 1407/1436] rocker: Check Handle PORT_PRE_BRIDGE_FLAGS In preparation for getting rid of switchdev_port_attr_get(), have rocker check for the bridge flags being set through switchdev_port_attr_set() with the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS attribute identifier. Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 55 +++++++++++++++++------ 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 5ce8d5aba603..25129f7b5583 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1565,6 +1565,43 @@ static int rocker_world_port_attr_stp_state_set(struct rocker_port *rocker_port, return wops->port_attr_stp_state_set(rocker_port, state); } +static int +rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port * + rocker_port, + unsigned long * + p_brport_flags_support) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + + if (!wops->port_attr_bridge_flags_support_get) + return -EOPNOTSUPP; + return wops->port_attr_bridge_flags_support_get(rocker_port, + p_brport_flags_support); +} + +static int +rocker_world_port_attr_pre_bridge_flags_set(struct rocker_port *rocker_port, + unsigned long brport_flags, + struct switchdev_trans *trans) +{ + struct rocker_world_ops *wops = rocker_port->rocker->wops; + unsigned long brport_flags_s; + int err; + + if (!wops->port_attr_bridge_flags_set) + return -EOPNOTSUPP; + + err = rocker_world_port_attr_bridge_flags_support_get(rocker_port, + &brport_flags_s); + if (err) + return err; + + if (brport_flags & ~brport_flags_s) + return -EINVAL; + + return 0; +} + static int rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, unsigned long brport_flags, @@ -1582,20 +1619,6 @@ rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, trans); } -static int -rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port * - rocker_port, - unsigned long * - p_brport_flags_support) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_attr_bridge_flags_support_get) - return -EOPNOTSUPP; - return wops->port_attr_bridge_flags_support_get(rocker_port, - p_brport_flags_support); -} - static int rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, u32 ageing_time, @@ -2074,6 +2097,10 @@ static int rocker_port_attr_set(struct net_device *dev, attr->u.stp_state, trans); break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = rocker_world_port_attr_pre_bridge_flags_set(rocker_port, + attr->u.brport_flags, + trans); case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = rocker_world_port_attr_bridge_flags_set(rocker_port, attr->u.brport_flags, From 1ef0764486fa6b22d7e211e56486acb416a9ea8d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:24 -0800 Subject: [PATCH 1408/1436] net: bridge: Stop calling switchdev_port_attr_get() Now that all switchdev drivers have been converted to check the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS flags and report flags that they do not support accordingly, we can migrate the bridge code to try to set that attribute first, check the results and then do the actual setting. Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_switchdev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index db9e8ab96d48..af57c4a2b78a 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -64,21 +64,19 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, { struct switchdev_attr attr = { .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, + .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, + .u.brport_flags = mask, }; int err; if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD) return 0; - err = switchdev_port_attr_get(p->dev, &attr); + err = switchdev_port_attr_set(p->dev, &attr); if (err == -EOPNOTSUPP) return 0; - if (err) - return err; - /* Check if specific bridge flag attribute offload is supported */ - if (!(attr.u.brport_flags_support & mask)) { + if (err) { br_warn(p->br, "bridge flag offload is not supported %u(%s)\n", (unsigned int)p->port_no, p->dev->name); return -EOPNOTSUPP; @@ -87,6 +85,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS; attr.flags = SWITCHDEV_F_DEFER; attr.u.brport_flags = flags; + err = switchdev_port_attr_set(p->dev, &attr); if (err) { br_warn(p->br, "error setting offload flag on port %u(%s)\n", From cc0c207a5d18333fbfecc964a47ddb182fbcb720 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:25 -0800 Subject: [PATCH 1409/1436] net: Remove SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT Now that we have converted the bridge code and the drivers to check for bridge port(s) flags at the time we try to set them, there is no need for a get() -> set() sequence anymore and SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT therefore becomes unused. Reviewed-by: Ido Schimmel Signed-off-by: Florian Fainelli Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 6 ++---- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 11 +---------- drivers/net/ethernet/rocker/rocker_main.c | 14 +------------- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 10 +--------- include/net/switchdev.h | 2 -- net/dsa/slave.c | 16 +--------------- 6 files changed, 6 insertions(+), 53 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index f3244d87512a..79c8b0f16aee 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -232,10 +232,8 @@ Learning_sync attribute enables syncing of the learned/forgotten FDB entry to the bridge's FDB. It's possible, but not optimal, to enable learning on the device port and on the bridge port, and disable learning_sync. -To support learning and learning_sync port attributes, the driver implements -switchdev op switchdev_port_attr_get/set for -SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. The driver should initialize the attributes -to the hardware defaults. +To support learning, the driver implements switchdev op +switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. FDB Ageing ^^^^^^^^^^ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9a8798f74d2b..bbb5a406232e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -434,16 +434,7 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) static int mlxsw_sp_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | - BR_MCAST_FLOOD; - break; - default: - return -EOPNOTSUPP; - } - - return 0; + return -EOPNOTSUPP; } static int diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 25129f7b5583..6b8273e2057d 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2069,19 +2069,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { static int rocker_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - const struct rocker_port *rocker_port = netdev_priv(dev); - int err = 0; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - err = rocker_world_port_attr_bridge_flags_support_get(rocker_port, - &attr->u.brport_flags_support); - break; - default: - return -EOPNOTSUPP; - } - - return err; + return -EOPNOTSUPP; } static int rocker_port_attr_set(struct net_device *dev, diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index 331625137717..de4dcabbc29a 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -643,15 +643,7 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev) static int swdev_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr) { - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; - break; - default: - return -EOPNOTSUPP; - } - - return 0; + return -EOPNOTSUPP; } static int port_attr_stp_state_set(struct net_device *netdev, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index de72b0a3867f..0f352019ef99 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -45,7 +45,6 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, @@ -63,7 +62,6 @@ struct switchdev_attr { union { u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_{PRE}_BRIDGE_FLAGS */ - unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 44cc4e50dd5a..db0a2651070f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -388,21 +388,7 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev, static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_switch *ds = dp->ds; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = 0; - if (ds->ops->port_egress_floods) - attr->u.brport_flags_support |= BR_FLOOD | - BR_MCAST_FLOOD; - break; - default: - return -EOPNOTSUPP; - } - - return 0; + return -EOPNOTSUPP; } static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, From 010c8f01aa7fe18ea97d302e1c7e9ca83bc27433 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Feb 2019 16:58:26 -0800 Subject: [PATCH 1410/1436] net: Get rid of switchdev_port_attr_get() With the bridge no longer calling switchdev_port_attr_get() to obtain the supported bridge port flags from a driver but instead trying to set the bridge port flags directly and relying on driver to reject unsupported configurations, we can effectively get rid of switchdev_port_attr_get() entirely since this was the only place where it was called. Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 7 ------- drivers/net/ethernet/rocker/rocker_main.c | 7 ------- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 7 ------- include/net/switchdev.h | 8 -------- net/dsa/slave.c | 7 ------- 6 files changed, 1 insertion(+), 37 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 79c8b0f16aee..413abbae952f 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -233,7 +233,7 @@ the bridge's FDB. It's possible, but not optimal, to enable learning on the device port and on the bridge port, and disable learning_sync. To support learning, the driver implements switchdev op -switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. +switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_{PRE}_BRIDGE_FLAGS. FDB Ageing ^^^^^^^^^^ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index bbb5a406232e..766f5b5f1cf5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -431,12 +431,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) mlxsw_sp_bridge_vlan_destroy(bridge_vlan); } -static int mlxsw_sp_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static int mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_vlan *bridge_vlan, @@ -1945,7 +1939,6 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, } static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { - .switchdev_port_attr_get = mlxsw_sp_port_attr_get, .switchdev_port_attr_set = mlxsw_sp_port_attr_set, }; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 6b8273e2057d..8200fbf91306 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2066,12 +2066,6 @@ static const struct net_device_ops rocker_port_netdev_ops = { * swdev interface ********************/ -static int rocker_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static int rocker_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -2148,7 +2142,6 @@ static int rocker_port_obj_del(struct net_device *dev, } static const struct switchdev_ops rocker_port_switchdev_ops = { - .switchdev_port_attr_get = rocker_port_attr_get, .switchdev_port_attr_set = rocker_port_attr_set, }; diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index de4dcabbc29a..018399ee8731 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -640,12 +640,6 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev) fsl_mc_free_irqs(sw_dev); } -static int swdev_port_attr_get(struct net_device *netdev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static int port_attr_stp_state_set(struct net_device *netdev, struct switchdev_trans *trans, u8 state) @@ -932,7 +926,6 @@ static int swdev_port_obj_del(struct net_device *netdev, } static const struct switchdev_ops ethsw_port_switchdev_ops = { - .switchdev_port_attr_get = swdev_port_attr_get, .switchdev_port_attr_set = swdev_port_attr_set, }; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 0f352019ef99..45310ddf2d7e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -179,8 +179,6 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) #ifdef CONFIG_NET_SWITCHDEV void switchdev_deferred_process(void); -int switchdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, @@ -225,12 +223,6 @@ static inline void switchdev_deferred_process(void) { } -static inline int switchdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static inline int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index db0a2651070f..a78b2bba0332 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -385,12 +385,6 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev, return 0; } -static int dsa_slave_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return -EOPNOTSUPP; -} - static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, struct sk_buff *skb) { @@ -1057,7 +1051,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = { }; static const struct switchdev_ops dsa_slave_switchdev_ops = { - .switchdev_port_attr_get = dsa_slave_port_attr_get, .switchdev_port_attr_set = dsa_slave_port_attr_set, }; From 4ccb45857c2c0776d0f72e39768295062c1a0de1 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 19 Feb 2019 10:38:47 +0100 Subject: [PATCH 1411/1436] net: stmmac: Fix NAPI poll in TX path when in multi-queue Commit 8fce33317023 introduced the concept of NAPI per-channel and independent cleaning of TX path. This is currently breaking performance in some cases. The scenario happens when all packets are being received in Queue 0 but the TX is performed in Queue != 0. Fix this by using different NAPI instances per each TX and RX queue, as suggested by Florian. Changes from v2: - Only force restart transmission if there are pending packets Changes from v1: - Pass entire ring size to TX clean path (Florian) Signed-off-by: Jose Abreu Cc: Florian Fainelli Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 5 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 119 ++++++++++-------- 2 files changed, 70 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index f9b42b0c20f4..dd95d959c1ce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -79,11 +79,10 @@ struct stmmac_rx_queue { }; struct stmmac_channel { - struct napi_struct napi ____cacheline_aligned_in_smp; + struct napi_struct rx_napi ____cacheline_aligned_in_smp; + struct napi_struct tx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; u32 index; - int has_rx; - int has_tx; }; struct stmmac_tc_entry { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f12dd59c85cf..e2a13ec2e30b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -155,7 +155,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; - napi_disable(&ch->napi); + if (queue < rx_queues_cnt) + napi_disable(&ch->rx_napi); + if (queue < tx_queues_cnt) + napi_disable(&ch->tx_napi); } } @@ -173,7 +176,10 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; - napi_enable(&ch->napi); + if (queue < rx_queues_cnt) + napi_enable(&ch->rx_napi); + if (queue < tx_queues_cnt) + napi_enable(&ch->tx_napi); } } @@ -1955,6 +1961,10 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); } + /* We still have pending packets, let's call for a new scheduling */ + if (tx_q->dirty_tx != tx_q->cur_tx) + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); return count; @@ -2045,23 +2055,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, &priv->xstats, chan); struct stmmac_channel *ch = &priv->channel[chan]; - bool needs_work = false; - if ((status & handle_rx) && ch->has_rx) { - needs_work = true; - } else { - status &= ~handle_rx; - } - - if ((status & handle_tx) && ch->has_tx) { - needs_work = true; - } else { - status &= ~handle_tx; - } - - if (needs_work && napi_schedule_prep(&ch->napi)) { + if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { stmmac_disable_dma_irq(priv, priv->ioaddr, chan); - __napi_schedule(&ch->napi); + napi_schedule_irqoff(&ch->rx_napi); + } + + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + napi_schedule_irqoff(&ch->tx_napi); } return status; @@ -2257,8 +2259,14 @@ static void stmmac_tx_timer(struct timer_list *t) ch = &priv->channel[tx_q->queue_index]; - if (likely(napi_schedule_prep(&ch->napi))) - __napi_schedule(&ch->napi); + /* + * If NAPI is already running we can miss some events. Let's rearm + * the timer and try again. + */ + if (likely(napi_schedule_prep(&ch->tx_napi))) + __napi_schedule(&ch->tx_napi); + else + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); } /** @@ -3514,7 +3522,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) else skb->ip_summed = CHECKSUM_UNNECESSARY; - napi_gro_receive(&ch->napi, skb); + napi_gro_receive(&ch->rx_napi, skb); priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; @@ -3529,40 +3537,45 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) return count; } -/** - * stmmac_poll - stmmac poll method (NAPI) - * @napi : pointer to the napi structure. - * @budget : maximum number of packets that the current CPU can receive from - * all interfaces. - * Description : - * To look at the incoming frames and clear the tx resources. - */ -static int stmmac_napi_poll(struct napi_struct *napi, int budget) +static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) { struct stmmac_channel *ch = - container_of(napi, struct stmmac_channel, napi); + container_of(napi, struct stmmac_channel, rx_napi); struct stmmac_priv *priv = ch->priv_data; - int work_done, rx_done = 0, tx_done = 0; u32 chan = ch->index; + int work_done; priv->xstats.napi_poll++; - if (ch->has_tx) - tx_done = stmmac_tx_clean(priv, budget, chan); - if (ch->has_rx) - rx_done = stmmac_rx(priv, budget, chan); + work_done = stmmac_rx(priv, budget, chan); + if (work_done < budget && napi_complete_done(napi, work_done)) + stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + return work_done; +} - work_done = max(rx_done, tx_done); +static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) +{ + struct stmmac_channel *ch = + container_of(napi, struct stmmac_channel, tx_napi); + struct stmmac_priv *priv = ch->priv_data; + struct stmmac_tx_queue *tx_q; + u32 chan = ch->index; + int work_done; + + priv->xstats.napi_poll++; + + work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan); work_done = min(work_done, budget); - if (work_done < budget && napi_complete_done(napi, work_done)) { - int stat; - + if (work_done < budget && napi_complete_done(napi, work_done)) stmmac_enable_dma_irq(priv, priv->ioaddr, chan); - stat = stmmac_dma_interrupt_status(priv, priv->ioaddr, - &priv->xstats, chan); - if (stat && napi_reschedule(napi)) - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + + /* Force transmission restart */ + tx_q = &priv->tx_queue[chan]; + if (tx_q->cur_tx != tx_q->dirty_tx) { + stmmac_enable_dma_transmission(priv, priv->ioaddr); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, + chan); } return work_done; @@ -4342,13 +4355,14 @@ int stmmac_dvr_probe(struct device *device, ch->priv_data = priv; ch->index = queue; - if (queue < priv->plat->rx_queues_to_use) - ch->has_rx = true; - if (queue < priv->plat->tx_queues_to_use) - ch->has_tx = true; - - netif_napi_add(ndev, &ch->napi, stmmac_napi_poll, - NAPI_POLL_WEIGHT); + if (queue < priv->plat->rx_queues_to_use) { + netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx, + NAPI_POLL_WEIGHT); + } + if (queue < priv->plat->tx_queues_to_use) { + netif_napi_add(ndev, &ch->tx_napi, stmmac_napi_poll_tx, + NAPI_POLL_WEIGHT); + } } mutex_init(&priv->lock); @@ -4404,7 +4418,10 @@ error_mdio_register: for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; - netif_napi_del(&ch->napi); + if (queue < priv->plat->rx_queues_to_use) + netif_napi_del(&ch->rx_napi); + if (queue < priv->plat->tx_queues_to_use) + netif_napi_del(&ch->tx_napi); } error_hw_init: destroy_workqueue(priv->wq); From 1103d3a5531ce60fa1f8ecaaa95d6f3558b77a29 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 19 Feb 2019 10:38:48 +0100 Subject: [PATCH 1412/1436] net: stmmac: dwmac4: Also use TBU interrupt to clean TX path TBU interrupt is a normal interrupt and can be used to trigger the cleaning of TX path. Lets check if it's active in DMA interrupt handler. While at it, refactor a little bit the function: - Don't check if RI is enabled because at function exit we will only clear the interrupts that are enabled so, no event will be missed. In my tests with GMAC5 this increased performance. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/dwmac4_lib.c | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 49f5687879df..545cb9c47433 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -124,9 +124,9 @@ void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan) int dwmac4_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan) { - int ret = 0; - u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan)); + u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + int ret = 0; /* ABNORMAL interrupts */ if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) { @@ -151,16 +151,11 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, if (likely(intr_status & DMA_CHAN_STATUS_NIS)) { x->normal_irq_n++; if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - u32 value; - - value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); - /* to schedule NAPI on real RIE event. */ - if (likely(value & DMA_CHAN_INTR_ENA_RIE)) { - x->rx_normal_irq_n++; - ret |= handle_rx; - } + x->rx_normal_irq_n++; + ret |= handle_rx; } - if (likely(intr_status & DMA_CHAN_STATUS_TI)) { + if (likely(intr_status & (DMA_CHAN_STATUS_TI | + DMA_CHAN_STATUS_TBU))) { x->tx_normal_irq_n++; ret |= handle_tx; } @@ -168,12 +163,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, x->rx_early_irq++; } - /* Clear the interrupt by writing a logic 1 to the chanX interrupt - * status [21-0] expect reserved bits [5-3] - */ - writel((intr_status & 0x3fffc7), - ioaddr + DMA_CHAN_STATUS(chan)); - + writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan)); return ret; } From ae9f346dd38cc10d083fce4175ba6c8f8e06fe00 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Tue, 19 Feb 2019 10:38:49 +0100 Subject: [PATCH 1413/1436] net: stmmac: dwxgmac2: Also use TBU interrupt to clean TX path TBU interrupt is a normal interrupt and can be used to trigger the cleaning of TX path. Lets check if it's active in DMA interrupt handler. While at it, refactor a little bit the function: - Don't check if RI is enabled because at function exit we will only clear the interrupts that are enabled so, no event will be missed. In my tests withe XGMAC2 this increased performance. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 4 +++- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index d6bb953685fa..37d5e6fe7473 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -193,9 +193,10 @@ #define XGMAC_AIE BIT(14) #define XGMAC_RBUE BIT(7) #define XGMAC_RIE BIT(6) +#define XGMAC_TBUE BIT(2) #define XGMAC_TIE BIT(0) #define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \ - XGMAC_RIE | XGMAC_TIE) + XGMAC_RIE | XGMAC_TBUE | XGMAC_TIE) #define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x))) #define XGMAC_RWT GENMASK(7, 0) #define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x))) @@ -204,6 +205,7 @@ #define XGMAC_FBE BIT(12) #define XGMAC_RBU BIT(7) #define XGMAC_RI BIT(6) +#define XGMAC_TBU BIT(2) #define XGMAC_TPS BIT(1) #define XGMAC_TI BIT(0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index c5e25580a43f..2ba712b48a89 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -283,12 +283,10 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, x->normal_irq_n++; if (likely(intr_status & XGMAC_RI)) { - if (likely(intr_en & XGMAC_RIE)) { - x->rx_normal_irq_n++; - ret |= handle_rx; - } + x->rx_normal_irq_n++; + ret |= handle_rx; } - if (likely(intr_status & XGMAC_TI)) { + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { x->tx_normal_irq_n++; ret |= handle_tx; } From c39f3e0e4f15714f29cf3c089cdb6e179997a415 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:12 +0000 Subject: [PATCH 1414/1436] mlxsw: spectrum: Add struct mlxsw_sp_sb_vals Spectrum-2 will be configured with a different shared buffer configuration than Spectrum-1. Therefore introduce a structure for keeping the chip-specific default and immutable configuration. Configuration mutable in runtime will still be kept in struct mlxsw_sp_sb. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7ee6d747e97b..c1ec4f89973b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4092,6 +4092,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4109,6 +4110,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ceebc91f4f1d..976843917c95 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -133,6 +133,7 @@ struct mlxsw_sp_kvdl_ops; struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; +struct mlxsw_sp_sb_vals; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -167,6 +168,7 @@ struct mlxsw_sp { const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_sb_vals *sb_vals; }; static inline struct mlxsw_sp_upper * @@ -372,6 +374,9 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); +extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; +extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; + /* spectrum_switchdev.c */ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 80066f437a65..cd23ec9268bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -73,6 +73,9 @@ struct mlxsw_sp_sb { u64 sb_size; }; +struct mlxsw_sp_sb_vals { +}; + u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) { return mlxsw_sp->sb->cell_size * cells; @@ -625,6 +628,12 @@ out: *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i; } +const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { +}; + +const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { +}; + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { u16 ing_pool_count; From 93d201f7757e0ec079bc72344fe3577b7333a585 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:14 +0000 Subject: [PATCH 1415/1436] mlxsw: spectrum_buffers: Allocate prs & pms dynamically Spectrum-2 will be configured with a different set of pools than Spectrum-1. The size of prs and pms buffers will therefore depend on the chip type of the device. Therefore, instead of reserving an array directly in a structure definition, allocate the buffer in mlxsw_sp_sb_port{,s}_init(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index cd23ec9268bc..7fa291ebcddf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -63,11 +63,11 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { struct mlxsw_sp_sb_port { struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT]; struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN]; + struct mlxsw_sp_sb_pm *pms; }; struct mlxsw_sp_sb { - struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN]; + struct mlxsw_sp_sb_pr *prs; struct mlxsw_sp_sb_port *ports; u32 cell_size; u64 sb_size; @@ -283,20 +283,68 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); } +static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_sb_port *sb_port) +{ + struct mlxsw_sp_sb_pm *pms; + + pms = kcalloc(MLXSW_SP_SB_POOL_DESS_LEN, sizeof(*pms), GFP_KERNEL); + if (!pms) + return -ENOMEM; + sb_port->pms = pms; + return 0; +} + +static void mlxsw_sp_sb_port_fini(struct mlxsw_sp_sb_port *sb_port) +{ + kfree(sb_port->pms); +} + static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) { unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_sb_pr *prs; + int i; + int err; mlxsw_sp->sb->ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port), GFP_KERNEL); if (!mlxsw_sp->sb->ports) return -ENOMEM; + + prs = kcalloc(MLXSW_SP_SB_POOL_DESS_LEN, sizeof(*prs), GFP_KERNEL); + if (!prs) { + err = -ENOMEM; + goto err_alloc_prs; + } + mlxsw_sp->sb->prs = prs; + + for (i = 0; i < max_ports; i++) { + err = mlxsw_sp_sb_port_init(mlxsw_sp, &mlxsw_sp->sb->ports[i]); + if (err) + goto err_sb_port_init; + } + return 0; + +err_sb_port_init: + for (i--; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); +err_alloc_prs: + kfree(mlxsw_sp->sb->ports); + return err; } static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) { + int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + int i; + + for (i = max_ports - 1; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); kfree(mlxsw_sp->sb->ports); } From 5d65f5f45eaff091a2409e67225983dad4a33423 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:16 +0000 Subject: [PATCH 1416/1436] mlxsw: spectrum_buffers: Keep pool descriptors in mlxsw_sp_sb_vals Keep the table of pool descriptors and its length in struct mlxsw_sp_sb_vals so that it can be specialized per chip type. Redirect all users from the global definitions to the mlxsw_sp_sb fields. Give mlxsw_sp_pool_count() an extra mlxsw_sp parameter so that it can access the descriptor table. Drop the now unnecessary MLXSW_SP_SB_POOL_DESS_LEN. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 7fa291ebcddf..72126d904013 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -55,8 +55,6 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_EGRESS, 15}, }; -#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess) - #define MLXSW_SP_SB_ING_TC_COUNT 8 #define MLXSW_SP_SB_EG_TC_COUNT 16 @@ -74,6 +72,8 @@ struct mlxsw_sp_sb { }; struct mlxsw_sp_sb_vals { + unsigned int pool_count; + const struct mlxsw_sp_sb_pool_des *pool_dess; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -124,7 +124,7 @@ static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, u32 size, bool infi_size) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpr_pl[MLXSW_REG_SBPR_LEN]; struct mlxsw_sp_sb_pr *pr; int err; @@ -148,7 +148,7 @@ static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool infi_max, u16 pool_index) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbcm_pl[MLXSW_REG_SBCM_LEN]; struct mlxsw_sp_sb_cm *cm; int err; @@ -177,7 +177,7 @@ static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, u32 min_buff, u32 max_buff) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; int err; @@ -198,7 +198,7 @@ static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, struct list_head *bulk_list) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, @@ -220,7 +220,7 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, struct list_head *bulk_list) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; @@ -288,7 +288,8 @@ static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_sb_pm *pms; - pms = kcalloc(MLXSW_SP_SB_POOL_DESS_LEN, sizeof(*pms), GFP_KERNEL); + pms = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*pms), + GFP_KERNEL); if (!pms) return -ENOMEM; sb_port->pms = pms; @@ -313,7 +314,8 @@ static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->sb->ports) return -ENOMEM; - prs = kcalloc(MLXSW_SP_SB_POOL_DESS_LEN, sizeof(*prs), GFP_KERNEL); + prs = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*prs), + GFP_KERNEL); if (!prs) { err = -ENOMEM; goto err_alloc_prs; @@ -498,6 +500,7 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, const struct mlxsw_sp_sb_cm *cms, size_t cms_len) { + const struct mlxsw_sp_sb_vals *sb_vals = mlxsw_sp->sb_vals; int i; int err; @@ -509,7 +512,7 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir)) + if (WARN_ON(sb_vals->pool_dess[cm->pool_index].dir != dir)) continue; min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); @@ -648,7 +651,7 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) u32 min_buff; mc = &mlxsw_sp_sb_mms[i]; - des = &mlxsw_sp_sb_pool_dess[mc->pool_index]; + des = &mlxsw_sp->sb_vals->pool_dess[mc->pool_index]; /* All pools used by sb_mm's are initialized using dynamic * thresholds, therefore 'max_buff' isn't specified in cells. */ @@ -662,24 +665,30 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } -static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len) +static void mlxsw_sp_pool_count(struct mlxsw_sp *mlxsw_sp, + u16 *p_ingress_len, u16 *p_egress_len) { int i; - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i) - if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS) + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i) + if (mlxsw_sp->sb_vals->pool_dess[i].dir == + MLXSW_REG_SBXX_DIR_EGRESS) goto out; WARN(1, "No egress pools\n"); out: *p_ingress_len = i; - *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i; + *p_egress_len = mlxsw_sp->sb_vals->pool_count - i; } const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), + .pool_dess = mlxsw_sp_sb_pool_dess, }; const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), + .pool_dess = mlxsw_sp_sb_pool_dess, }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) @@ -714,7 +723,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) goto err_sb_mms_init; - mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count); + mlxsw_sp_pool_count(mlxsw_sp, &ing_pool_count, &eg_pool_count); err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, mlxsw_sp->sb->sb_size, ing_pool_count, @@ -762,10 +771,11 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info) { - enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + enum mlxsw_reg_sbxx_dir dir; struct mlxsw_sp_sb_pr *pr; + dir = mlxsw_sp->sb_vals->pool_dess[pool_index].dir; pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); @@ -891,7 +901,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; - if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir) + if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) return -EINVAL; err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, @@ -989,7 +999,7 @@ next_batch: continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, &bulk_list); if (err) @@ -1048,7 +1058,7 @@ next_batch: continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, &bulk_list); if (err) From cc1ce6ff3453d7b188bd0c4ca186cf9ebd071e82 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:18 +0000 Subject: [PATCH 1417/1436] mlxsw: spectrum_buffers: Keep mlxsw_sp_sb_pms in mlxsw_sp_sb_vals The SBPM register can be used to configure quotas for packets ingressing from a certain pool to a certain port, and egressing from a certain pool to a certain port. The default configuration depends on the chip type. Therefore keep it in struct mlxsw_sp_sb_vals. Redirect the one reference from the global array to the field. Because the pool descriptor ID is implicit in the ordering of array members, both this array and the pool descriptor array have the same length. Therefore reuse mlxsw_sp_sb.pool_dess_len for the purpose of determining the length of SBPM array. Drop the now useless MLXSW_SP_SB_PMS_LEN. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 72126d904013..672c27c79e8a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -74,6 +74,7 @@ struct mlxsw_sp_sb { struct mlxsw_sp_sb_vals { unsigned int pool_count; const struct mlxsw_sp_sb_pool_des *pool_dess; + const struct mlxsw_sp_sb_pm *pms; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -581,16 +582,14 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { MLXSW_SP_SB_PM(10000, 90000), }; -#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) - static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int i; int err; - for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { - const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { + const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp->sb_vals->pms[i]; u32 max_buff; u32 min_buff; @@ -684,11 +683,13 @@ out: const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), .pool_dess = mlxsw_sp_sb_pool_dess, + .pms = mlxsw_sp_sb_pms, }; const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), .pool_dess = mlxsw_sp_sb_pool_dess, + .pms = mlxsw_sp_sb_pms, }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) From 5d25232eb925b73c4f632923821006d0d9ffb9aa Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:20 +0000 Subject: [PATCH 1418/1436] mlxsw: spectrum_buffers: Keep mlxsw_sp_sb_prs in mlxsw_sp_sb_vals The SBPR register configures shared buffer pools. The default configuration depends on the chip type. Therefore keep it in struct mlxsw_sp_sb_vals. Redirect the one reference from the global array to the field. Because the pool descriptor ID is implicit in the ordering of array members, both this array and the pool descriptor array have the same length. Therefore reuse mlxsw_sp_sb.pool_dess_len for the purpose of determining the length of SBPR array. Drop the now useless MLXSW_SP_SB_PRS_LEN. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 672c27c79e8a..c100ce11417a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -75,6 +75,7 @@ struct mlxsw_sp_sb_vals { unsigned int pool_count; const struct mlxsw_sp_sb_pool_des *pool_dess; const struct mlxsw_sp_sb_pm *pms; + const struct mlxsw_sp_sb_pr *prs; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -377,8 +378,6 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; -#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs) - static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_sb_pr *prs, size_t prs_len) @@ -684,12 +683,14 @@ const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, + .prs = mlxsw_sp_sb_prs, }; const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, + .prs = mlxsw_sp_sb_prs, }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) @@ -710,12 +711,11 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); - err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) goto err_sb_ports_init; - err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs, - MLXSW_SP_SB_PRS_LEN); + err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs, + mlxsw_sp->sb_vals->pool_count); if (err) goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); From bb60a62e0247571e3f4224b792930711e18950ce Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:22 +0000 Subject: [PATCH 1419/1436] mlxsw: spectrum_buffers: Keep mlxsw_sp_sb_cm in sb_vals The SBCM register configures shared buffer quota according to port-priority resp. port-TC. The default configuration depends on the chip type. Therefore keep the tables and their lengths in struct mlxsw_sp_sb_vals. Redirect the references from the global definitions to the fields. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c100ce11417a..18b182656df2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -76,6 +76,12 @@ struct mlxsw_sp_sb_vals { const struct mlxsw_sp_sb_pool_des *pool_dess; const struct mlxsw_sp_sb_pm *pms; const struct mlxsw_sp_sb_pr *prs; + const struct mlxsw_sp_sb_cm *cms_ingress; + const struct mlxsw_sp_sb_cm *cms_egress; + const struct mlxsw_sp_sb_cm *cms_cpu; + unsigned int cms_ingress_count; + unsigned int cms_egress_count; + unsigned int cms_cpu_count; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -423,8 +429,6 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { MLXSW_SP_SB_CM(20000, 1, 3), }; -#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) - static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), @@ -445,8 +449,6 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1, 0xff, 4), }; -#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) - #define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { @@ -484,9 +486,6 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, }; -#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ - ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) - static bool mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index) { @@ -538,27 +537,28 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err; - err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + err = __mlxsw_sp_sb_cms_init(mlxsw_sp, mlxsw_sp_port->local_port, MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_cms_ingress, - MLXSW_SP_SB_CMS_INGRESS_LEN); + mlxsw_sp->sb_vals->cms_ingress, + mlxsw_sp->sb_vals->cms_ingress_count); if (err) return err; return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_port->local_port, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_cms_egress, - MLXSW_SP_SB_CMS_EGRESS_LEN); + mlxsw_sp->sb_vals->cms_egress, + mlxsw_sp->sb_vals->cms_egress_count); } static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) { return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_cpu_port_sb_cms, - MLXSW_SP_CPU_PORT_SB_MCS_LEN); + mlxsw_sp->sb_vals->cms_cpu, + mlxsw_sp->sb_vals->cms_cpu_count); } #define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ @@ -684,6 +684,12 @@ const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, .prs = mlxsw_sp_sb_prs, + .cms_ingress = mlxsw_sp_sb_cms_ingress, + .cms_egress = mlxsw_sp_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), }; const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { @@ -691,6 +697,12 @@ const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, .prs = mlxsw_sp_sb_prs, + .cms_ingress = mlxsw_sp_sb_cms_ingress, + .cms_egress = mlxsw_sp_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) From 13f35cc4245c8de3bdae3e5529c0904f3f80da3f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:23 +0000 Subject: [PATCH 1420/1436] mlxsw: spectrum_buffers: Keep mlxsw_sp_sb_mm in sb_vals The SBMM register configures the shared buffer quota for MC packets according to Switch-Priority. The default configuration depends on the chip type. Therefore keep the table and length in struct mlxsw_sp_sb_vals. Redirect the references from the global definitions to the fields. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 18b182656df2..5194fc8f80cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -37,6 +37,12 @@ struct mlxsw_sp_sb_pm { struct mlxsw_cp_sb_occ occ; }; +struct mlxsw_sp_sb_mm { + u32 min_buff; + u32 max_buff; + u16 pool_index; +}; + struct mlxsw_sp_sb_pool_des { enum mlxsw_reg_sbxx_dir dir; u8 pool; @@ -76,9 +82,11 @@ struct mlxsw_sp_sb_vals { const struct mlxsw_sp_sb_pool_des *pool_dess; const struct mlxsw_sp_sb_pm *pms; const struct mlxsw_sp_sb_pr *prs; + const struct mlxsw_sp_sb_mm *mms; const struct mlxsw_sp_sb_cm *cms_ingress; const struct mlxsw_sp_sb_cm *cms_egress; const struct mlxsw_sp_sb_cm *cms_cpu; + unsigned int mms_count; unsigned int cms_ingress_count; unsigned int cms_egress_count; unsigned int cms_cpu_count; @@ -604,12 +612,6 @@ static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -struct mlxsw_sp_sb_mm { - u32 min_buff; - u32 max_buff; - u16 pool_index; -}; - #define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ @@ -635,20 +637,18 @@ static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { MLXSW_SP_SB_MM(0, 6, 4), }; -#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) - static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) { char sbmm_pl[MLXSW_REG_SBMM_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->mms_count; i++) { const struct mlxsw_sp_sb_pool_des *des; const struct mlxsw_sp_sb_mm *mc; u32 min_buff; - mc = &mlxsw_sp_sb_mms[i]; + mc = &mlxsw_sp->sb_vals->mms[i]; des = &mlxsw_sp->sb_vals->pool_dess[mc->pool_index]; /* All pools used by sb_mm's are initialized using dynamic * thresholds, therefore 'max_buff' isn't specified in cells. @@ -684,9 +684,11 @@ const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, .prs = mlxsw_sp_sb_prs, + .mms = mlxsw_sp_sb_mms, .cms_ingress = mlxsw_sp_sb_cms_ingress, .cms_egress = mlxsw_sp_sb_cms_egress, .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), @@ -697,9 +699,11 @@ const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { .pool_dess = mlxsw_sp_sb_pool_dess, .pms = mlxsw_sp_sb_pms, .prs = mlxsw_sp_sb_prs, + .mms = mlxsw_sp_sb_mms, .cms_ingress = mlxsw_sp_sb_cms_ingress, .cms_egress = mlxsw_sp_sb_cms_egress, .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), From fe099bf682abbec77d0be73f62423ad794d88a04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:25 +0000 Subject: [PATCH 1421/1436] mlxsw: spectrum_buffers: Add Spectrum-2 shared buffer configuration Customize the tables related to shared buffer configuration to match the current recommendation for Spectrum-2 systems. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 134 ++++++++++++++---- 1 file changed, 106 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 5194fc8f80cc..bb327dfe1336 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -49,7 +49,7 @@ struct mlxsw_sp_sb_pool_des { }; /* Order ingress pools before egress pools. */ -static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { +static const struct mlxsw_sp_sb_pool_des mlxsw_sp1_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_INGRESS, 0}, {MLXSW_REG_SBXX_DIR_INGRESS, 1}, {MLXSW_REG_SBXX_DIR_INGRESS, 2}, @@ -61,6 +61,17 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_EGRESS, 15}, }; +static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, +}; + #define MLXSW_SP_SB_ING_TC_COUNT 8 #define MLXSW_SP_SB_EG_TC_COUNT 16 @@ -366,32 +377,53 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->sb->ports); } -#define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000 -#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) -#define MLXSW_SP_SB_PR_EGRESS_SIZE 13232000 - #define MLXSW_SP_SB_PR(_mode, _size) \ { \ .mode = _mode, \ .size = _size, \ } -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { +#define MLXSW_SP1_SB_PR_INGRESS_SIZE 12440000 +#define MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP1_SB_PR_EGRESS_SIZE 13232000 + +static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { /* Ingress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_SB_PR_INGRESS_SIZE), + MLXSW_SP1_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), + MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE), /* Egress pools. */ - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP1_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; +#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000 +#define MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000 + +static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { + /* Ingress pools. */ + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_INGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE), + /* Egress pools. */ + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_EGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), +}; + static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_sb_pr *prs, size_t prs_len) @@ -424,7 +456,7 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, .pool_index = _pool, \ } -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_ingress[] = { MLXSW_SP_SB_CM(10000, 8, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), @@ -437,7 +469,20 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { MLXSW_SP_SB_CM(20000, 1, 3), }; -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_ingress[] = { + MLXSW_SP_SB_CM(0, 7, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(20000, 1, 3), +}; + +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), @@ -457,6 +502,26 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1, 0xff, 4), }; +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = { + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(1, 0xff, 4), +}; + #define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { @@ -575,7 +640,7 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) .max_buff = _max_buff, \ } -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { +static const struct mlxsw_sp_sb_pm mlxsw_sp1_sb_pms[] = { /* Ingress pools. */ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), @@ -589,6 +654,19 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { MLXSW_SP_SB_PM(10000, 90000), }; +static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = { + /* Ingress pools. */ + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + /* Egress pools. */ + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), +}; + static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -680,32 +758,32 @@ out: } const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { - .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), - .pool_dess = mlxsw_sp_sb_pool_dess, - .pms = mlxsw_sp_sb_pms, - .prs = mlxsw_sp_sb_prs, + .pool_count = ARRAY_SIZE(mlxsw_sp1_sb_pool_dess), + .pool_dess = mlxsw_sp1_sb_pool_dess, + .pms = mlxsw_sp1_sb_pms, + .prs = mlxsw_sp1_sb_prs, .mms = mlxsw_sp_sb_mms, - .cms_ingress = mlxsw_sp_sb_cms_ingress, - .cms_egress = mlxsw_sp_sb_cms_egress, + .cms_ingress = mlxsw_sp1_sb_cms_ingress, + .cms_egress = mlxsw_sp1_sb_cms_egress, .cms_cpu = mlxsw_sp_cpu_port_sb_cms, .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), - .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), - .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_egress), .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), }; const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { - .pool_count = ARRAY_SIZE(mlxsw_sp_sb_pool_dess), - .pool_dess = mlxsw_sp_sb_pool_dess, - .pms = mlxsw_sp_sb_pms, - .prs = mlxsw_sp_sb_prs, + .pool_count = ARRAY_SIZE(mlxsw_sp2_sb_pool_dess), + .pool_dess = mlxsw_sp2_sb_pool_dess, + .pms = mlxsw_sp2_sb_pms, + .prs = mlxsw_sp2_sb_prs, .mms = mlxsw_sp_sb_mms, - .cms_ingress = mlxsw_sp_sb_cms_ingress, - .cms_egress = mlxsw_sp_sb_cms_egress, + .cms_ingress = mlxsw_sp2_sb_cms_ingress, + .cms_egress = mlxsw_sp2_sb_cms_egress, .cms_cpu = mlxsw_sp_cpu_port_sb_cms, .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), - .cms_ingress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_ingress), - .cms_egress_count = ARRAY_SIZE(mlxsw_sp_sb_cms_egress), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_egress), .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), }; From edf777f55aee61d1021fc496f16ef121380cfa67 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:27 +0000 Subject: [PATCH 1422/1436] mlxsw: spectrum_buffers: Update port headroom configuration The recommendation for headroom size for 100Gbps port and 100m cable is 101.6KB, reduced accordingly for split ports. The closest higher number evenly divisible by cell size for both Spectrum-1 and Spectrum-2, and such that the number of cells can be further divided by maximum split factor of 4, is 102528 bytes, or 25632 bytes per lane. Update mlxsw_sp_port_pb_init() to compute the headroom taking into account this recommended per-lane value and number of lanes actually dedicated to a given port. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_buffers.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index bb327dfe1336..65ef4a9f6afb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -260,24 +260,24 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, (unsigned long) pm); } -static const u16 mlxsw_sp_pbs[] = { - [0] = 2 * ETH_FRAME_LEN, - [9] = 2 * MLXSW_PORT_MAX_MTU, -}; - -#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) +/* 1/4 of a headroom necessary for 100Gbps port and 100m cable. */ +#define MLXSW_SP_PB_HEADROOM 25632 #define MLXSW_SP_PB_UNUSED 8 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { + const u32 pbs[] = { + [0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width, + [9] = 2 * MLXSW_PORT_MAX_MTU, + }; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pbmc_pl[MLXSW_REG_PBMC_LEN]; int i; mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); - for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { - u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]); + for (i = 0; i < ARRAY_SIZE(pbs); i++) { + u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, pbs[i]); if (i == MLXSW_SP_PB_UNUSED) continue; From bb6c346cef6fb0414865aaa6e2f3a11ceeeae14a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Feb 2019 19:32:29 +0000 Subject: [PATCH 1423/1436] mlxsw: spectrum_buffers: Reject overlarge headroom size requests cap_max_headroom_size holds maximum headroom size supported. Overstepping that limit might under certain conditions lead to ASIC freeze. Query and store the value, and add mlxsw_sp_sb_max_headroom_cells() for obtaining the stored value. In __mlxsw_sp_port_headroom_set(), reject requests where the total port buffer is larger than the advertised maximum. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/resources.h | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 ++++++++++++- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../ethernet/mellanox/mlxsw/spectrum_buffers.c | 16 ++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index b8b3a01c2a9e..773ef7fdb285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -26,6 +26,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_MAX_BUFFER_SIZE, MLXSW_RES_ID_CELL_SIZE, + MLXSW_RES_ID_MAX_HEADROOM_SIZE, MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, MLXSW_RES_ID_ACL_MAX_TCAM_RULES, MLXSW_RES_ID_ACL_MAX_REGIONS, @@ -79,6 +80,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ + [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c1ec4f89973b..f018b0607dac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -852,8 +852,12 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; u16 delay = !!my_pfc ? my_pfc->delay : 0; char pbmc_pl[MLXSW_REG_PBMC_LEN]; + u32 taken_headroom_cells = 0; + u32 max_headroom_cells; int i, j, err; + max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp); + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); if (err) @@ -864,6 +868,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, bool pfc = false; u16 thres_cells; u16 delay_cells; + u16 total_cells; bool lossy; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { @@ -881,7 +886,13 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, pause_en); - mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres_cells + delay_cells, + total_cells = thres_cells + delay_cells; + + taken_headroom_cells += total_cells; + if (taken_headroom_cells > max_headroom_cells) + return -ENOBUFS; + + mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells, thres_cells, lossy); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 976843917c95..8bb83d0facc2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -373,6 +373,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, u32 *p_cur, u32 *p_max); u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); +u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp); extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 65ef4a9f6afb..9a79b5e11597 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -85,6 +85,7 @@ struct mlxsw_sp_sb { struct mlxsw_sp_sb_pr *prs; struct mlxsw_sp_sb_port *ports; u32 cell_size; + u32 max_headroom_cells; u64 sb_size; }; @@ -113,6 +114,11 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size); } +u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp->sb->max_headroom_cells; +} + static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, u16 pool_index) { @@ -789,6 +795,7 @@ const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { + u32 max_headroom_size; u16 ing_pool_count; u16 eg_pool_count; int err; @@ -799,12 +806,21 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EIO; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE)) + return -EIO; + mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); if (!mlxsw_sp->sb) return -ENOMEM; mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); + max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_HEADROOM_SIZE); + /* Round down, because this limit must not be overstepped. */ + mlxsw_sp->sb->max_headroom_cells = max_headroom_size / + mlxsw_sp->sb->cell_size; + err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) goto err_sb_ports_init; From bdd59d661153a59af79112676565276ae2a1f457 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 20 Feb 2019 10:32:40 +0800 Subject: [PATCH 1424/1436] net: hns3: add pointer checking at the beginning of the exported functions. These functions are exported, add pointer checking at the beginning can make them more safe. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 50011aafbae4..9f5349b9243f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -32,6 +32,9 @@ static bool hnae3_client_match(enum hnae3_client_type client_type, void hnae3_set_client_init_flag(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev, int inited) { + if (!client || !ae_dev) + return; + switch (client->type) { case HNAE3_CLIENT_KNIC: hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited); @@ -109,6 +112,9 @@ int hnae3_register_client(struct hnae3_client *client) struct hnae3_ae_dev *ae_dev; int ret = 0; + if (!client) + return -ENODEV; + mutex_lock(&hnae3_common_lock); /* one system should only have one client for every type */ list_for_each_entry(client_tmp, &hnae3_client_list, node) { @@ -141,6 +147,9 @@ void hnae3_unregister_client(struct hnae3_client *client) { struct hnae3_ae_dev *ae_dev; + if (!client) + return; + mutex_lock(&hnae3_common_lock); /* un-initialize the client on every matched port */ list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { @@ -163,6 +172,9 @@ void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo) struct hnae3_client *client; int ret = 0; + if (!ae_algo) + return; + mutex_lock(&hnae3_common_lock); list_add_tail(&ae_algo->node, &hnae3_ae_algo_list); @@ -209,6 +221,9 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo) struct hnae3_ae_dev *ae_dev; struct hnae3_client *client; + if (!ae_algo) + return; + mutex_lock(&hnae3_common_lock); /* Check if there are matched ae_dev */ list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { @@ -245,6 +260,9 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) struct hnae3_client *client; int ret = 0; + if (!ae_dev) + return -ENODEV; + mutex_lock(&hnae3_common_lock); list_add_tail(&ae_dev->node, &hnae3_ae_dev_list); @@ -307,6 +325,9 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev) struct hnae3_ae_algo *ae_algo; struct hnae3_client *client; + if (!ae_dev) + return; + mutex_lock(&hnae3_common_lock); /* Check if there are matched ae_algo */ list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) { From 676131f7c53ecdd79e29fc8cfcdefe6f9f2485e8 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 20 Feb 2019 10:32:41 +0800 Subject: [PATCH 1425/1436] net: hns3: Check variable is valid before assigning it to another In hnae3_register_ae_dev(), ae_algo->ops is assigned to ae_dev->ops before check that ae_algo->ops is valid. And in hnae3_register_ae_algo(), missing check for ae_algo->ops. This patch fixes them. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 9f5349b9243f..17ab4f4af6ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -185,8 +185,12 @@ void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo) if (!id) continue; - /* ae_dev init should set flag */ + if (!ae_algo->ops) { + dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n"); + continue; + } ae_dev->ops = ae_algo->ops; + ret = ae_algo->ops->init_ae_dev(ae_dev); if (ret) { dev_err(&ae_dev->pdev->dev, @@ -194,6 +198,7 @@ void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo) continue; } + /* ae_dev init should set flag */ hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1); /* check the client list for the match with this ae_dev type and @@ -273,15 +278,13 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) if (!id) continue; - ae_dev->ops = ae_algo->ops; - - if (!ae_dev->ops) { - dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n"); + if (!ae_algo->ops) { + dev_err(&ae_dev->pdev->dev, "ae_algo ops are null\n"); ret = -EOPNOTSUPP; goto out_err; } + ae_dev->ops = ae_algo->ops; - /* ae_dev init should set flag */ ret = ae_dev->ops->init_ae_dev(ae_dev); if (ret) { dev_err(&ae_dev->pdev->dev, @@ -289,6 +292,7 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) goto out_err; } + /* ae_dev init should set flag */ hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1); break; } From db68ca0ef7c98bb39cb193571c63b22ca99f633f Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 20 Feb 2019 10:32:42 +0800 Subject: [PATCH 1426/1436] net: hns3: convert mac advertize and supported from u32 to link mode The link mode with bits has been up to more than 31 for some MAC and phy. Convert to using a linkmode bitmap, which can support all link modes. Signed-off-by: Jian Shen Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 75ec3094bbf9..9a442f3c682a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -839,27 +839,27 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev, unsigned long *supported = hdev->hw.mac.supported; if (speed_ability & HCLGE_SUPPORT_1G_BIT) - set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, - supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + supported); if (speed_ability & HCLGE_SUPPORT_10G_BIT) - set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, - supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + supported); if (speed_ability & HCLGE_SUPPORT_25G_BIT) - set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, - supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + supported); if (speed_ability & HCLGE_SUPPORT_50G_BIT) - set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, - supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + supported); if (speed_ability & HCLGE_SUPPORT_100G_BIT) - set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, - supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + supported); - set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); } static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability) From f18635d52c23594259da2f741399abc6c6f41f15 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 20 Feb 2019 10:32:43 +0800 Subject: [PATCH 1427/1436] net: hns3: fix port info query issue for copper port In original codes, for copper port which doesn't connect to phy, it always returns -EOPNOTSUPP when query port information. This patch fixes it by return the port information of MAC. Fixes: 5f373b158523 ("net: hns3: Fix speed/duplex information loss problem when executing ethtool ethx cmd of VF") Signed-off-by: Jian Shen Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 9 ++--- .../hisilicon/hns3/hns3pf/hclge_main.c | 38 +++++++++++++++++-- .../hisilicon/hns3/hns3pf/hclge_main.h | 4 ++ .../hisilicon/hns3/hns3pf/hclge_mdio.c | 17 +-------- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 63f5f56bda94..d94c90a38563 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -621,12 +621,11 @@ static int hns3_get_link_ksettings(struct net_device *netdev, hns3_get_ksettings(h, cmd); break; case HNAE3_MEDIA_TYPE_COPPER: - if (!netdev->phydev) - return -EOPNOTSUPP; - cmd->base.port = PORT_TP; - phy_ethtool_ksettings_get(netdev->phydev, cmd); - + if (!netdev->phydev) + hns3_get_ksettings(h, cmd); + else + phy_ethtool_ksettings_get(netdev->phydev, cmd); break; default: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9a442f3c682a..87edac45c040 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -862,14 +862,44 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev, linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); } +static void hclge_parse_copper_link_mode(struct hclge_dev *hdev, + u8 speed_ability) +{ + unsigned long *supported = hdev->hw.mac.supported; + + /* default to support all speed for GE port */ + if (!speed_ability) + speed_ability = HCLGE_SUPPORT_GE; + + if (speed_ability & HCLGE_SUPPORT_1G_BIT) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + supported); + + if (speed_ability & HCLGE_SUPPORT_100M_BIT) { + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + supported); + } + + if (speed_ability & HCLGE_SUPPORT_10M_BIT) { + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported); + } + + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); +} + static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability) { u8 media_type = hdev->hw.mac.media_type; - if (media_type != HNAE3_MEDIA_TYPE_FIBER) - return; - - hclge_parse_fiber_link_mode(hdev, speed_ability); + if (media_type == HNAE3_MEDIA_TYPE_FIBER) + hclge_parse_fiber_link_mode(hdev, speed_ability); + else if (media_type == HNAE3_MEDIA_TYPE_COPPER) + hclge_parse_copper_link_mode(hdev, speed_ability); } static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index c939f4a7f5f0..3303919d089d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -188,6 +188,10 @@ enum HLCGE_PORT_TYPE { #define HCLGE_SUPPORT_25G_BIT BIT(2) #define HCLGE_SUPPORT_50G_BIT BIT(3) #define HCLGE_SUPPORT_100G_BIT BIT(4) +#define HCLGE_SUPPORT_100M_BIT BIT(6) +#define HCLGE_SUPPORT_10M_BIT BIT(7) +#define HCLGE_SUPPORT_GE \ + (HCLGE_SUPPORT_1G_BIT | HCLGE_SUPPORT_100M_BIT | HCLGE_SUPPORT_10M_BIT) enum HCLGE_DEV_STATE { HCLGE_STATE_REINITING, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 84f28785ba28..48eda2c6fdae 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -8,12 +8,6 @@ #include "hclge_main.h" #include "hclge_mdio.h" -#define HCLGE_PHY_SUPPORTED_FEATURES (SUPPORTED_Autoneg | \ - SUPPORTED_TP | \ - PHY_10BT_FEATURES | \ - PHY_100BT_FEATURES | \ - SUPPORTED_1000baseT_Full) - enum hclge_mdio_c22_op_seq { HCLGE_MDIO_C22_WRITE = 1, HCLGE_MDIO_C22_READ = 2 @@ -217,16 +211,9 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle) return ret; } - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, mask); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - mask); + linkmode_copy(mask, hdev->hw.mac.supported); linkmode_and(phydev->supported, phydev->supported, mask); - phy_support_asym_pause(phydev); + linkmode_copy(phydev->advertising, phydev->supported); return 0; } From 3d69e59f42bdd57c2a185da52f9579c33dec7121 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 Feb 2019 10:32:44 +0800 Subject: [PATCH 1428/1436] net: hns3: modify print message of ssu common ecc errors This patch add information of specific bit in log to be consistent with other type of errors, so that we can know which memory of ssu has occurred a ecc ras errors. Signed-off-by: Weihang Li Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_err.c | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index d0f654123b9b..c9c2c850aee4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -277,6 +277,45 @@ static const struct hclge_hw_error hclge_ssu_com_err_int[] = { { /* sentinel */ } }; +#define HCLGE_SSU_MEM_ECC_ERR(x) \ + { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err" } + +static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { + HCLGE_SSU_MEM_ECC_ERR(0), + HCLGE_SSU_MEM_ECC_ERR(1), + HCLGE_SSU_MEM_ECC_ERR(2), + HCLGE_SSU_MEM_ECC_ERR(3), + HCLGE_SSU_MEM_ECC_ERR(4), + HCLGE_SSU_MEM_ECC_ERR(5), + HCLGE_SSU_MEM_ECC_ERR(6), + HCLGE_SSU_MEM_ECC_ERR(7), + HCLGE_SSU_MEM_ECC_ERR(8), + HCLGE_SSU_MEM_ECC_ERR(9), + HCLGE_SSU_MEM_ECC_ERR(10), + HCLGE_SSU_MEM_ECC_ERR(11), + HCLGE_SSU_MEM_ECC_ERR(12), + HCLGE_SSU_MEM_ECC_ERR(13), + HCLGE_SSU_MEM_ECC_ERR(14), + HCLGE_SSU_MEM_ECC_ERR(15), + HCLGE_SSU_MEM_ECC_ERR(16), + HCLGE_SSU_MEM_ECC_ERR(17), + HCLGE_SSU_MEM_ECC_ERR(18), + HCLGE_SSU_MEM_ECC_ERR(19), + HCLGE_SSU_MEM_ECC_ERR(20), + HCLGE_SSU_MEM_ECC_ERR(21), + HCLGE_SSU_MEM_ECC_ERR(22), + HCLGE_SSU_MEM_ECC_ERR(23), + HCLGE_SSU_MEM_ECC_ERR(24), + HCLGE_SSU_MEM_ECC_ERR(25), + HCLGE_SSU_MEM_ECC_ERR(26), + HCLGE_SSU_MEM_ECC_ERR(27), + HCLGE_SSU_MEM_ECC_ERR(28), + HCLGE_SSU_MEM_ECC_ERR(29), + HCLGE_SSU_MEM_ECC_ERR(30), + HCLGE_SSU_MEM_ECC_ERR(31), + { /* sentinel */ } +}; + static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" }, { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port" }, @@ -835,13 +874,15 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, desc_data = (__le32 *)&desc[2]; status = le32_to_cpu(*(desc_data + 2)); if (status) { - dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_0 ssu_ecc_mbit_int[31:0]\n"); + hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", + &hclge_ssu_mem_ecc_err_int[0], status); HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); } status = le32_to_cpu(*(desc_data + 3)) & BIT(0); if (status) { - dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_ecc_mbit_int[32]\n"); + dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", + status); HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); } From 747fc3f351baa061b41a1e2a81821bc963e4f794 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 Feb 2019 10:32:45 +0800 Subject: [PATCH 1429/1436] net: hns3: some bugfix of ppu(rcb) ras errors The 3rd and 4th of PPU(RCB) PF Abnormal is RAS errors instead of MSI-X like other bits. This patch adds process of handling and logging this two bits. Otherwise, this patch modifies print message of 28th and 29th bit of PPU MPF Abnormal errors, which keep same with other errors now. Fixes: f69b10b317f9 ("net: hns3: handle hw errors of PPU(RCB)") Signed-off-by: Weihang Li Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 16 +++++++++++----- .../ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index c9c2c850aee4..4951684cf4d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1038,6 +1038,13 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", &hclge_igu_egu_tnl_int[0], status); + /* log PPU(RCB) errors */ + desc_data = (__le32 *)&desc[3]; + status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; + if (status) + hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", + &hclge_ppu_pf_abnormal_int[0], status); + /* clear all PF RAS errors */ hclge_cmd_reuse_desc(&desc[0], false); desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); @@ -1373,14 +1380,13 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, set_bit(HNAE3_GLOBAL_RESET, reset_requests); } - /* log PPU(RCB) errors */ + /* log PPU(RCB) MPF errors */ desc_data = (__le32 *)&desc[5]; status = le32_to_cpu(*(desc_data + 2)) & HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; if (status) { - dev_warn(dev, - "PPU_MPF_ABNORMAL_INT_ST2[28:29], err_status(0x%x)\n", - status); + hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", + &hclge_ppu_mpf_abnormal_int_st2[0], status); set_bit(HNAE3_CORE_RESET, reset_requests); } @@ -1427,7 +1433,7 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", &hclge_ppp_pf_abnormal_int[0], status); - /* PPU(RCB) PF errors */ + /* log PPU(RCB) PF errors */ desc_data = (__le32 *)&desc[3]; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; if (status) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 51a7d4eb066a..86d6143068c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -79,6 +79,7 @@ #define HCLGE_PPP_MPF_INT_ST3_MASK GENMASK(5, 0) #define HCLGE_PPU_MPF_INT_ST3_MASK GENMASK(7, 0) #define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK GENMASK(29, 28) +#define HCLGE_PPU_PF_INT_RAS_MASK 0x18 #define HCLGE_PPU_PF_INT_MSIX_MASK 0x27 #define HCLGE_QCN_FIFO_INT_MASK GENMASK(17, 0) #define HCLGE_QCN_ECC_INT_MASK GENMASK(21, 0) From d1f55d6bfcf007691a6392ca421cf5fa1b007d1c Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 Feb 2019 10:32:46 +0800 Subject: [PATCH 1430/1436] net: hns3: enable 8~11th bit of mac common msi-x error These bits are enabled now and have been test. Signed-off-by: Weihang Li Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 6 ++++++ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 4951684cf4d0..f0f1221fae8b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -219,6 +219,12 @@ static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err" }, { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err" }, { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err" }, + { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err" }, + { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err" }, + { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err" }, + { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err" }, + { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err" }, { /* sentinel */ } }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 86d6143068c1..fc068280d391 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -45,8 +45,8 @@ #define HCLGE_TM_QCN_MEM_ERR_INT_EN 0xFFFFFF #define HCLGE_NCSI_ERR_INT_EN 0x3 #define HCLGE_NCSI_ERR_INT_TYPE 0x9 -#define HCLGE_MAC_COMMON_ERR_INT_EN GENMASK(7, 0) -#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK GENMASK(7, 0) +#define HCLGE_MAC_COMMON_ERR_INT_EN 0x107FF +#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK 0x107FF #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN GENMASK(31, 0) #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK GENMASK(31, 0) #define HCLGE_PPU_MPF_ABNORMAL_INT1_EN GENMASK(31, 0) From 3aff0ac97391fe4181fd68f06b8fec345f7d1bc5 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 20 Feb 2019 10:32:47 +0800 Subject: [PATCH 1431/1436] net: hns3: fix 6th bit of ppp mpf abnormal errors This patch modify print message of 6th bit of ppp mpf abnormal errors, there is a extra letter e in it. Signed-off-by: Weihang Li Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index f0f1221fae8b..b9d363fa595a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -80,7 +80,7 @@ static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err" }, { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err" }, { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err" }, - { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_erre" }, + { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err" }, { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err" }, { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err" }, { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err" }, From 6dd86902f21d096ed7ef2dcc921ee80c73807313 Mon Sep 17 00:00:00 2001 From: liuzhongzhu Date: Wed, 20 Feb 2019 10:32:48 +0800 Subject: [PATCH 1432/1436] net: hns3: Record VF unicast and multicast tables Record the unicast and multicast tables that the VF sends to the chip. After the VF exception, the PF actively clears the VF to chip config. Signed-off-by: liuzhongzhu Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hclge_mbx.h | 2 + .../hisilicon/hns3/hns3pf/hclge_main.c | 102 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.h | 25 +++++ .../hisilicon/hns3/hns3pf/hclge_mbx.c | 28 ++++- 4 files changed, 156 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 53089cd980d9..9b6b7b45dde3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -42,6 +42,8 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */ HCLGE_MBX_LINK_STAT_MODE, /* (PF -> VF) link mode has changed */ HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */ + + HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */ }; /* below are per-VF mac-vlan subcodes */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 87edac45c040..b1f5c6fa6954 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1329,6 +1329,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) vport->back = hdev; vport->vport_id = i; vport->mps = HCLGE_MAC_DEFAULT_FRAME; + INIT_LIST_HEAD(&vport->uc_mac_list); + INIT_LIST_HEAD(&vport->mc_mac_list); if (i == 0) ret = hclge_vport_setup(vport, tqp_main_vport); @@ -6074,6 +6076,103 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, return status; } +void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr, + enum HCLGE_MAC_ADDR_TYPE mac_type) +{ + struct hclge_vport_mac_addr_cfg *mac_cfg; + struct list_head *list; + + if (!vport->vport_id) + return; + + mac_cfg = kzalloc(sizeof(*mac_cfg), GFP_KERNEL); + if (!mac_cfg) + return; + + mac_cfg->hd_tbl_status = true; + memcpy(mac_cfg->mac_addr, mac_addr, ETH_ALEN); + + list = (mac_type == HCLGE_MAC_ADDR_UC) ? + &vport->uc_mac_list : &vport->mc_mac_list; + + list_add_tail(&mac_cfg->node, list); +} + +void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr, + bool is_write_tbl, + enum HCLGE_MAC_ADDR_TYPE mac_type) +{ + struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp; + struct list_head *list; + bool uc_flag, mc_flag; + + list = (mac_type == HCLGE_MAC_ADDR_UC) ? + &vport->uc_mac_list : &vport->mc_mac_list; + + uc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_UC; + mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC; + + list_for_each_entry_safe(mac_cfg, tmp, list, node) { + if (strncmp(mac_cfg->mac_addr, mac_addr, ETH_ALEN) == 0) { + if (uc_flag && mac_cfg->hd_tbl_status) + hclge_rm_uc_addr_common(vport, mac_addr); + + if (mc_flag && mac_cfg->hd_tbl_status) + hclge_rm_mc_addr_common(vport, mac_addr); + + list_del(&mac_cfg->node); + kfree(mac_cfg); + break; + } + } +} + +void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, + enum HCLGE_MAC_ADDR_TYPE mac_type) +{ + struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp; + struct list_head *list; + + list = (mac_type == HCLGE_MAC_ADDR_UC) ? + &vport->uc_mac_list : &vport->mc_mac_list; + + list_for_each_entry_safe(mac_cfg, tmp, list, node) { + if (mac_type == HCLGE_MAC_ADDR_UC && mac_cfg->hd_tbl_status) + hclge_rm_uc_addr_common(vport, mac_cfg->mac_addr); + + if (mac_type == HCLGE_MAC_ADDR_MC && mac_cfg->hd_tbl_status) + hclge_rm_mc_addr_common(vport, mac_cfg->mac_addr); + + mac_cfg->hd_tbl_status = false; + if (is_del_list) { + list_del(&mac_cfg->node); + kfree(mac_cfg); + } + } +} + +void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) +{ + struct hclge_vport_mac_addr_cfg *mac, *tmp; + struct hclge_vport *vport; + int i; + + mutex_lock(&hdev->vport_cfg_mutex); + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) { + list_del(&mac->node); + kfree(mac); + } + + list_for_each_entry_safe(mac, tmp, &vport->mc_mac_list, node) { + list_del(&mac->node); + kfree(mac); + } + } + mutex_unlock(&hdev->vport_cfg_mutex); +} + static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, u16 cmdq_resp, u8 resp_code) { @@ -7329,6 +7428,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN; mutex_init(&hdev->vport_lock); + mutex_init(&hdev->vport_cfg_mutex); ret = hclge_pci_init(hdev); if (ret) { @@ -7621,6 +7721,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev); mutex_destroy(&hdev->vport_lock); + hclge_uninit_vport_mac_table(hdev); + mutex_destroy(&hdev->vport_cfg_mutex); ae_dev->priv = NULL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3303919d089d..781bd1145e2c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -632,6 +632,17 @@ struct hclge_fd_ad_data { u16 rule_id; }; +struct hclge_vport_mac_addr_cfg { + struct list_head node; + int hd_tbl_status; + u8 mac_addr[ETH_ALEN]; +}; + +enum HCLGE_MAC_ADDR_TYPE { + HCLGE_MAC_ADDR_UC, + HCLGE_MAC_ADDR_MC +}; + /* For each bit of TCAM entry, it uses a pair of 'x' and * 'y' to indicate which value to match, like below: * ---------------------------------- @@ -771,6 +782,8 @@ struct hclge_dev { /* unicast mac vlan space shared by PF and its VFs */ u16 share_umv_size; struct mutex umv_mutex; /* protect share_umv_size */ + + struct mutex vport_cfg_mutex; /* Protect stored vf table */ }; /* VPort level vlan tag configuration for TX direction */ @@ -838,6 +851,9 @@ struct hclge_vport { unsigned long state; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ + + struct list_head uc_mac_list; /* Store VF unicast table */ + struct list_head mc_mac_list; /* Store VF multicast table */ }; void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, @@ -892,4 +908,13 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf); u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id); int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type); +void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr, + enum HCLGE_MAC_ADDR_TYPE mac_type); +void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr, + bool is_write_tbl, + enum HCLGE_MAC_ADDR_TYPE mac_type); +void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, + enum HCLGE_MAC_ADDR_TYPE mac_type); +void hclge_uninit_vport_mac_table(struct hclge_dev *hdev); + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 7e4a104582d6..476ca8399446 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -224,12 +224,24 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport, hclge_rm_uc_addr_common(vport, old_addr); status = hclge_add_uc_addr_common(vport, mac_addr); - if (status) + if (status) { hclge_add_uc_addr_common(vport, old_addr); + } else { + hclge_rm_vport_mac_table(vport, mac_addr, + false, HCLGE_MAC_ADDR_UC); + hclge_add_vport_mac_table(vport, mac_addr, + HCLGE_MAC_ADDR_UC); + } } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) { status = hclge_add_uc_addr_common(vport, mac_addr); + if (!status) + hclge_add_vport_mac_table(vport, mac_addr, + HCLGE_MAC_ADDR_UC); } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) { status = hclge_rm_uc_addr_common(vport, mac_addr); + if (!status) + hclge_rm_vport_mac_table(vport, mac_addr, + false, HCLGE_MAC_ADDR_UC); } else { dev_err(&hdev->pdev->dev, "failed to set unicast mac addr, unknown subcode %d\n", @@ -255,8 +267,14 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport, if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_ADD) { status = hclge_add_mc_addr_common(vport, mac_addr); + if (!status) + hclge_add_vport_mac_table(vport, mac_addr, + HCLGE_MAC_ADDR_MC); } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_MC_REMOVE) { status = hclge_rm_mc_addr_common(vport, mac_addr); + if (!status) + hclge_rm_vport_mac_table(vport, mac_addr, + false, HCLGE_MAC_ADDR_MC); } else { dev_err(&hdev->pdev->dev, "failed to set mcast mac addr, unknown subcode %d\n", @@ -585,6 +603,14 @@ void hclge_mbx_handler(struct hclge_dev *hdev) case HCLGE_MBX_GET_LINK_MODE: hclge_get_link_mode(vport, req); break; + case HCLGE_MBX_GET_VF_FLR_STATUS: + mutex_lock(&hdev->vport_cfg_mutex); + hclge_rm_vport_all_mac_table(vport, true, + HCLGE_MAC_ADDR_UC); + hclge_rm_vport_all_mac_table(vport, true, + HCLGE_MAC_ADDR_MC); + mutex_unlock(&hdev->vport_cfg_mutex); + break; default: dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %d\n", From c6075b193462d9a3930fb41f587f94720658752a Mon Sep 17 00:00:00 2001 From: liuzhongzhu Date: Wed, 20 Feb 2019 10:32:49 +0800 Subject: [PATCH 1433/1436] net: hns3: Record VF vlan tables Record the vlan tables that the VF sends to the chip. After the VF exception, the PF actively clears the VF to chip config. Signed-off-by: liuzhongzhu Signed-off-by: Peng Li Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 80 +++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.h | 13 ++- .../hisilicon/hns3/hns3pf/hclge_mbx.c | 4 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index b1f5c6fa6954..4a596460271c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1329,6 +1329,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) vport->back = hdev; vport->vport_id = i; vport->mps = HCLGE_MAC_DEFAULT_FRAME; + INIT_LIST_HEAD(&vport->vlan_list); INIT_LIST_HEAD(&vport->uc_mac_list); INIT_LIST_HEAD(&vport->mc_mac_list); @@ -6746,6 +6747,84 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false); } +void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id) +{ + struct hclge_vport_vlan_cfg *vlan; + + /* vlan 0 is reserved */ + if (!vlan_id) + return; + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return; + + vlan->hd_tbl_status = true; + vlan->vlan_id = vlan_id; + + list_add_tail(&vlan->node, &vport->vlan_list); +} + +void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, + bool is_write_tbl) +{ + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + if (vlan->vlan_id == vlan_id) { + if (is_write_tbl && vlan->hd_tbl_status) + hclge_set_vlan_filter_hw(hdev, + htons(ETH_P_8021Q), + vport->vport_id, + vlan_id, 0, + true); + + list_del(&vlan->node); + kfree(vlan); + break; + } + } +} + +void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) +{ + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + if (vlan->hd_tbl_status) + hclge_set_vlan_filter_hw(hdev, + htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, 0, + true); + + vlan->hd_tbl_status = false; + if (is_del_list) { + list_del(&vlan->node); + kfree(vlan); + } + } +} + +void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) +{ + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_vport *vport; + int i; + + mutex_lock(&hdev->vport_cfg_mutex); + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + list_del(&vlan->node); + kfree(vlan); + } + } + mutex_unlock(&hdev->vport_cfg_mutex); +} + int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -7722,6 +7801,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_pci_uninit(hdev); mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_mac_table(hdev); + hclge_uninit_vport_vlan_table(hdev); mutex_destroy(&hdev->vport_cfg_mutex); ae_dev->priv = NULL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 781bd1145e2c..abbc58ea4864 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -643,6 +643,12 @@ enum HCLGE_MAC_ADDR_TYPE { HCLGE_MAC_ADDR_MC }; +struct hclge_vport_vlan_cfg { + struct list_head node; + int hd_tbl_status; + u16 vlan_id; +}; + /* For each bit of TCAM entry, it uses a pair of 'x' and * 'y' to indicate which value to match, like below: * ---------------------------------- @@ -854,6 +860,7 @@ struct hclge_vport { struct list_head uc_mac_list; /* Store VF unicast table */ struct list_head mc_mac_list; /* Store VF multicast table */ + struct list_head vlan_list; /* Store VF vlan table */ }; void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc, @@ -916,5 +923,9 @@ void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr, void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, enum HCLGE_MAC_ADDR_TYPE mac_type); void hclge_uninit_vport_mac_table(struct hclge_dev *hdev); - +void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id); +void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, + bool is_write_tbl); +void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list); +void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 476ca8399446..9e0952c329bb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -305,6 +305,9 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, memcpy(&proto, &mbx_req->msg[5], sizeof(proto)); status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), vlan, is_kill); + if (!status) + is_kill ? hclge_rm_vport_vlan_table(vport, vlan, false) + : hclge_add_vport_vlan_table(vport, vlan); } else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) { struct hnae3_handle *handle = &vport->nic; bool en = mbx_req->msg[2] ? true : false; @@ -609,6 +612,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev) HCLGE_MAC_ADDR_UC); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_MC); + hclge_rm_vport_all_vlan_table(vport, true); mutex_unlock(&hdev->vport_cfg_mutex); break; default: From 232d0d55fca6ab589d5267c74a376210f341d71e Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Wed, 20 Feb 2019 10:32:50 +0800 Subject: [PATCH 1434/1436] net: hns3: uninitialize command queue while unloading PF driver According to the hardware's description, the driver should clear the command queue's registers when uloading driver. Otherwise, these existing value may lead the IMP get into a wrong state. Also this patch adds hclge_cmd_uninit() to do the command queue uninitialization which includes clearing registers and freeing memory. Fixes: 68c0a5c70614 ("net: hns3: Add HNS3 IMP(Integrated Mgmt Proc) Cmd Interface Support") Signed-off-by: Huazhong Tan Signed-off-by: Peng Li Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_cmd.c | 26 +++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- .../hisilicon/hns3/hns3pf/hclge_main.c | 4 +-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 1dddfcba41a9..3a093a92eac5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -390,6 +390,20 @@ int hclge_cmd_init(struct hclge_dev *hdev) return 0; } +static void hclge_cmd_uninit_regs(struct hclge_hw *hw) +{ + hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_L_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0); + hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0); +} + static void hclge_destroy_queue(struct hclge_cmq_ring *ring) { spin_lock(&ring->lock); @@ -402,3 +416,15 @@ void hclge_destroy_cmd_queue(struct hclge_hw *hw) hclge_destroy_queue(&hw->cmq.csq); hclge_destroy_queue(&hw->cmq.crq); } + +void hclge_cmd_uninit(struct hclge_dev *hdev) +{ + spin_lock_bh(&hdev->hw.cmq.csq.lock); + spin_lock(&hdev->hw.cmq.crq.lock); + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + hclge_cmd_uninit_regs(&hdev->hw); + spin_unlock(&hdev->hw.cmq.crq.lock); + spin_unlock_bh(&hdev->hw.cmq.csq.lock); + + hclge_destroy_cmd_queue(&hdev->hw); +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index ac2cd219a9fb..bad21c89f6b7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -975,6 +975,6 @@ enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw, enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, struct hclge_desc *desc); -void hclge_destroy_cmd_queue(struct hclge_hw *hw); +void hclge_cmd_uninit(struct hclge_dev *hdev); int hclge_cmd_queue_init(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4a596460271c..5c8f2e4d71c9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7669,7 +7669,7 @@ err_msi_irq_uninit: err_msi_uninit: pci_free_irq_vectors(pdev); err_cmd_uninit: - hclge_destroy_cmd_queue(&hdev->hw); + hclge_cmd_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.io_base); pci_clear_master(pdev); @@ -7796,7 +7796,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) synchronize_irq(hdev->misc_vector.vector_irq); hclge_hw_error_set_state(hdev, false); - hclge_destroy_cmd_queue(&hdev->hw); + hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev); mutex_destroy(&hdev->vport_lock); From 34f81f049e354082f638820afe5e59bbd243fb18 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Wed, 20 Feb 2019 10:32:51 +0800 Subject: [PATCH 1435/1436] net: hns3: clear command queue's registers when unloading VF driver According to the hardware's description, the driver should clear the command queue's registers when uloading VF driver. Otherwise, these existing value may lead the IMP get into a wrong state. Fixes: fedd0c15d288 ("net: hns3: Add HNS3 VF IMP(Integrated Management Proc) cmd interface") Signed-off-by: Huazhong Tan Signed-off-by: Peng Li Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3vf/hclgevf_cmd.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 4e78e8812a04..9441b453d38d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -362,8 +362,28 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev) return 0; } +static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw) +{ + hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_L_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_L_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0); + hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_TAIL_REG, 0); +} + void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { + spin_lock_bh(&hdev->hw.cmq.csq.lock); + spin_lock(&hdev->hw.cmq.crq.lock); + clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + hclgevf_cmd_uninit_regs(&hdev->hw); + spin_unlock(&hdev->hw.cmq.crq.lock); + spin_unlock_bh(&hdev->hw.cmq.csq.lock); hclgevf_free_cmd_desc(&hdev->hw.cmq.csq); hclgevf_free_cmd_desc(&hdev->hw.cmq.crq); } From 7a25c6c0aac85bbc50d3ce49cd08888adb14508b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Feb 2019 18:26:46 -0800 Subject: [PATCH 1436/1436] rocker: Add missing break for PRE_BRIDGE_FLAGS A missing break keyword should have been added after adding support for PRE_BRIDGE_FLAGS. Reported-by: Stephen Rothwell Fixes: 93700458ff63 ("rocker: Check Handle PORT_PRE_BRIDGE_FLAGS") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 8200fbf91306..309a6bf9130c 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2083,6 +2083,7 @@ static int rocker_port_attr_set(struct net_device *dev, err = rocker_world_port_attr_pre_bridge_flags_set(rocker_port, attr->u.brport_flags, trans); + break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = rocker_world_port_attr_bridge_flags_set(rocker_port, attr->u.brport_flags,