From 5647b25c3335a25ba32d73e61850a374a708788a Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 26 Sep 2016 20:21:45 +1000 Subject: [PATCH 001/305] drm/sun4i: rgb: Enable panel after controller The panel should be enabled after the controller so that we do not have visual glitches on the panel while the controller is setup. Similarly, the panel should be disabled before the controller. Signed-off-by: Jonathan Liu Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index c3ff10f559cc..4e4bea6f395c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -152,15 +152,16 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling RGB output\n"); - if (!IS_ERR(tcon->panel)) { + if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - drm_panel_enable(tcon->panel); - } /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ drm_bridge_enable(encoder->bridge); sun4i_tcon_channel_enable(tcon, 0); + + if (!IS_ERR(tcon->panel)) + drm_panel_enable(tcon->panel); } static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) @@ -171,15 +172,16 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling RGB output\n"); + if (!IS_ERR(tcon->panel)) + drm_panel_disable(tcon->panel); + sun4i_tcon_channel_disable(tcon, 0); /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) { - drm_panel_disable(tcon->panel); + if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); - } } static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder, From 0df03b43035afd0a64916fe4e5bca978562ffa5a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Sep 2016 14:05:05 +0200 Subject: [PATCH 002/305] drm/sun4i: rgb: Remove the bridge enable/disable functions The atomic helpers already call the drm_bridge_enable on our behalf, there's no need to do it a second time. Reported-by: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index 4e4bea6f395c..d198ad7e5323 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -155,9 +155,6 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ - drm_bridge_enable(encoder->bridge); - sun4i_tcon_channel_enable(tcon, 0); if (!IS_ERR(tcon->panel)) @@ -177,9 +174,6 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) sun4i_tcon_channel_disable(tcon, 0); - /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ - drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); } From 0ce267ff95a0302cf6fb2a552833abbfb7861a43 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 18 Oct 2016 15:36:48 +0200 Subject: [PATCH 003/305] fuse: fix root dentry initialization Add missing dentry initialization to root dentry. Fixes: f75fdf22b0a8 ("fuse: don't use ->d_time") Reported-by: Andreas Reis Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 5 +++++ fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6a4d0e5418a1..b3ebe512d64c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = { .d_release = fuse_dentry_release, }; +const struct dentry_operations fuse_root_dentry_operations = { + .d_init = fuse_dentry_init, + .d_release = fuse_dentry_release, +}; + int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dfbb136e59a..91307940c8ac 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode) extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +extern const struct dentry_operations fuse_root_dentry_operations; /** * Inode to nodeid comparison. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17141099f2e7..6fe6a88ecb4a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; root = fuse_get_root_inode(sb, d.rootmode); + sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* only now - we want root dentry with NULL ->d_op */ + /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); From f95df7d6cd92787d54c9ad3d4843f9bcd137f9db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:16:35 +0000 Subject: [PATCH 004/305] dmaengine: edma: Fix error return code in edma_alloc_chan_resources() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index e18a58068bca..77242b37ef87 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1628,6 +1628,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan) if (echan->slot[0] < 0) { dev_err(dev, "Entry slot allocation failed for channel %u\n", EDMA_CHAN_SLOT(echan->ch_num)); + ret = echan->slot[0]; goto err_slot; } From d6619761068cf573cae406f176d00b82a39a37fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Thu, 6 Oct 2016 17:59:53 -0400 Subject: [PATCH 005/305] dmaengine: mmp_tdma: add missing select GENERIC_ALLOCATOR in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some compilation errors when CONFIG_MMP_TDMA is enabled and CONFIG_GENERIC_ALLOCATOR is disabled: drivers/built-in.o: In function `mmp_tdma_prep_dma_cyclic': mmp_tdma.c:(.text+0x7890e): undefined reference to `gen_pool_dma_alloc' drivers/built-in.o: In function `mmp_tdma_free_chan_resources': mmp_tdma.c:(.text+0x78aca): undefined reference to `gen_pool_free' drivers/built-in.o: In function `mmp_tdma_probe': mmp_tdma.c:(.text+0x78ea8): undefined reference to `of_gen_pool_get' This commit fix this problem by selecting GENERIC_ALLOCATOR when CONFIG_MMP_TDMA is enabled. Signed-off-by: Jérémy Lefaure Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index af63a6bcf564..141aefbe37ec 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -306,6 +306,7 @@ config MMP_TDMA depends on ARCH_MMP || COMPILE_TEST select DMA_ENGINE select MMP_SRAM if ARCH_MMP + select GENERIC_ALLOCATOR help Support the MMP Two-Channel DMA engine. This engine used for MMP Audio DMA and pxa910 SQU. From 83ba62bc700bab710b22be3a1bf6cf973f754273 Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 18 Oct 2016 16:23:59 +0800 Subject: [PATCH 006/305] drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE If we want to set the hardware OD to relay mode, we have to set OD_CFG register rather than OD_RELAYMODE; otherwise, the system will access the wrong address. Fixes: 7216436420414144646f5d8343d061355fd23483 ("drm/mediatek: set mt8173 dithering function") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index df33b3ca6ffd..aa5f20fabd10 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); + writel(OD_RELAYMODE, comp->regs + OD_CFG); mtk_dither_set(comp, bpc, DISP_OD_CFG); } From f752fff611b99f5679224f3990a1f531ea64b1ec Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:48 +0800 Subject: [PATCH 007/305] drm/mediatek: set vblank_disable_allowed to true MTK DRM driver didn't set the vblank_disable_allowed to true, it cause that the irq_handler is called every 16.6 ms (every vblank) when the display didn't be updated. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index cf83f6507ec8..0b2ae47eb52c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,6 +217,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; + drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); From 56e4b1e183555c74097fa012f1606b22223f027b Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:49 +0800 Subject: [PATCH 008/305] drm/mediatek: clear IRQ status before enable OVL interrupt To make sure that the first vblank IRQ after enabling vblank isn't too short or immediate, we have to clear the IRQ status before enable OVL interrupt. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 019b7ca392d7..f75c5b5a536c 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -80,6 +80,7 @@ static void mtk_ovl_enable_vblank(struct mtk_ddp_comp *comp, ddp_comp); priv->crtc = crtc; + writel(0x0, comp->regs + DISP_REG_OVL_INTSTA); writel_relaxed(OVL_FME_CPL_INT, comp->regs + DISP_REG_OVL_INTEN); } From d542b7c473f0eb34455974d66ea93653b3eb40ce Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:13 +0800 Subject: [PATCH 009/305] drm/mediatek: do mtk_hdmi_send_infoframe after HDMI clock enable The mtk_hdmi_send_infoframe have to be run after PLL and PIXEL clock of HDMI enable. Make sure that HDMI inforframes can be sent successfully. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 71227deef21b..0e8c4d9af340 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1133,12 +1133,6 @@ static int mtk_hdmi_output_set_display_mode(struct mtk_hdmi *hdmi, phy_power_on(hdmi->phy); mtk_hdmi_aud_output_config(hdmi, mode); - mtk_hdmi_setup_audio_infoframe(hdmi); - mtk_hdmi_setup_avi_infoframe(hdmi, mode); - mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); - if (mode->flags & DRM_MODE_FLAG_3D_MASK) - mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); - mtk_hdmi_hw_vid_black(hdmi, false); mtk_hdmi_hw_aud_unmute(hdmi); mtk_hdmi_hw_send_av_unmute(hdmi); @@ -1401,6 +1395,16 @@ static void mtk_hdmi_bridge_pre_enable(struct drm_bridge *bridge) hdmi->powered = true; } +static void mtk_hdmi_send_infoframe(struct mtk_hdmi *hdmi, + struct drm_display_mode *mode) +{ + mtk_hdmi_setup_audio_infoframe(hdmi); + mtk_hdmi_setup_avi_infoframe(hdmi, mode); + mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); + if (mode->flags & DRM_MODE_FLAG_3D_MASK) + mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); +} + static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); @@ -1409,6 +1413,7 @@ static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PLL]); clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PIXEL]); phy_power_on(hdmi->phy); + mtk_hdmi_send_infoframe(hdmi, &hdmi->mode); hdmi->enabled = true; } From 968253bd7caae5621f6806dd5055353fe33d366e Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:14 +0800 Subject: [PATCH 010/305] drm/mediatek: enhance the HDMI driving current In order to improve 4K resolution performance, we have to enhance the HDMI driving current when clock rate is greater than 165MHz. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- .../gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 8a24754b440f..51cb9cfb6646 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -265,6 +265,9 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); unsigned int pre_div; unsigned int div; + unsigned int pre_ibias; + unsigned int hdmi_ibias; + unsigned int imp_en; dev_dbg(hdmi_phy->dev, "%s: %lu Hz, parent: %lu Hz\n", __func__, rate, parent_rate); @@ -298,18 +301,31 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, (0x1 << PLL_BR_SHIFT), RG_HDMITX_PLL_BP | RG_HDMITX_PLL_BC | RG_HDMITX_PLL_BR); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_PRD_IMP_EN); + if (rate < 165000000) { + mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x3; + imp_en = 0x0; + hdmi_ibias = hdmi_phy->ibias; + } else { + mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x6; + imp_en = 0xf; + hdmi_ibias = hdmi_phy->ibias_up; + } mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON4, - (0x3 << PRD_IBIAS_CLK_SHIFT) | - (0x3 << PRD_IBIAS_D2_SHIFT) | - (0x3 << PRD_IBIAS_D1_SHIFT) | - (0x3 << PRD_IBIAS_D0_SHIFT), + (pre_ibias << PRD_IBIAS_CLK_SHIFT) | + (pre_ibias << PRD_IBIAS_D2_SHIFT) | + (pre_ibias << PRD_IBIAS_D1_SHIFT) | + (pre_ibias << PRD_IBIAS_D0_SHIFT), RG_HDMITX_PRD_IBIAS_CLK | RG_HDMITX_PRD_IBIAS_D2 | RG_HDMITX_PRD_IBIAS_D1 | RG_HDMITX_PRD_IBIAS_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON3, - (0x0 << DRV_IMP_EN_SHIFT), RG_HDMITX_DRV_IMP_EN); + (imp_en << DRV_IMP_EN_SHIFT), + RG_HDMITX_DRV_IMP_EN); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (hdmi_phy->drv_imp_clk << DRV_IMP_CLK_SHIFT) | (hdmi_phy->drv_imp_d2 << DRV_IMP_D2_SHIFT) | @@ -318,12 +334,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, RG_HDMITX_DRV_IMP_CLK | RG_HDMITX_DRV_IMP_D2 | RG_HDMITX_DRV_IMP_D1 | RG_HDMITX_DRV_IMP_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON5, - (hdmi_phy->ibias << DRV_IBIAS_CLK_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D2_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D1_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D0_SHIFT), - RG_HDMITX_DRV_IBIAS_CLK | RG_HDMITX_DRV_IBIAS_D2 | - RG_HDMITX_DRV_IBIAS_D1 | RG_HDMITX_DRV_IBIAS_D0); + (hdmi_ibias << DRV_IBIAS_CLK_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D2_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D1_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D0_SHIFT), + RG_HDMITX_DRV_IBIAS_CLK | + RG_HDMITX_DRV_IBIAS_D2 | + RG_HDMITX_DRV_IBIAS_D1 | + RG_HDMITX_DRV_IBIAS_D0); return 0; } From 0d2200794f0a2c1ebb3b6613842914d8ce4b67f9 Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:15 +0800 Subject: [PATCH 011/305] drm/mediatek: modify the factor to make the pll_rate set in the 1G-2G range Currently, the code sets the "pll" to the desired multiple of the pixel clock manully(4*3m 8*3,etc). The valid range of the pll is 1G-2G, however, when the pixel clock is bigger than 167MHz, the "pll" will be set to a invalid value( > 2G), then the "pll" will be 2GHz, thus the pixel clock will be in correct. Change the factor to make the "pll" be set in the (1G, 2G) range. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_dpi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 0186e500d2a5..90fb831ef031 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -432,11 +432,16 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, unsigned long pll_rate; unsigned int factor; + /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ pix_rate = 1000UL * mode->clock; - if (mode->clock <= 74000) + if (mode->clock <= 27000) + factor = 16 * 3; + else if (mode->clock <= 84000) factor = 8 * 3; - else + else if (mode->clock <= 167000) factor = 4 * 3; + else + factor = 2 * 3; pll_rate = pix_rate * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", From faead41cc7213ccef5a58c1bf518ac24816fe8a6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Sep 2016 10:31:41 +0200 Subject: [PATCH 012/305] iwlwifi: pcie: mark command queue lock with separate lockdep class Emmanuel reports that when CMD_WANT_ASYNC_CALLBACK is used by mvm, the callback will be called with the command queue lock held, and mvm will try to stop all (other) TX queues, which acquires their locks - this caused a false lockdep recursive locking report. Suppress this report by marking the command queue lock with a new, separate, lock class so lockdep can tell the difference between the two types of queues. Fixes: 156f92f2b471 ("iwlwifi: block the queues when we send ADD_STA for uAPSD") Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e9a278b60dfd..5f840f16f40b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -592,6 +592,7 @@ error: static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, u32 txq_id) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int ret; txq->need_update = false; @@ -606,6 +607,13 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, return ret; spin_lock_init(&txq->lock); + + if (txq_id == trans_pcie->cmd_queue) { + static struct lock_class_key iwl_pcie_cmd_queue_lock_class; + + lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class); + } + __skb_queue_head_init(&txq->overflow_q); /* From 276c4b4b74b6d5bc3cab35534409f3ad32464b78 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 28 Sep 2016 11:32:35 +0300 Subject: [PATCH 013/305] iwlwifi: mvm: use ssize_t for len in iwl_debugfs_mem_read() In iwl_dbgfs_mem_read(), the len variable may become negative and is compared to < 0 (an error case). Comparing size_t (which is unsigned) to < 0 causes a warning on certain platforms (like i386): drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c:1561:5-8: WARNING: Unsigned expression compared with zero: len < 0 To prevent that, use ssize_t for len instead. Fixes: commit 2b55f43f8e47 ("iwlwifi: mvm: Add mem debugfs entry") Reported-by: kbuild test robot Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 539d718df797..06805a63f091 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1529,8 +1529,8 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, .data = { &cmd, }, .len = { sizeof(cmd) }, }; - size_t delta, len; - ssize_t ret; + size_t delta; + ssize_t ret, len; hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, DEBUG_GROUP, 0); From 85cd69b8f1f7e289fe931a82889e673fd0f04842 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 11:24:12 +0300 Subject: [PATCH 014/305] iwlwifi: mvm: fix d3_test with unified D0/D3 images When a unified D0/D3 image is used, we don't restart the FW in the D0->D3->D0 transitions. Therefore, the d3_test functionality should not call ieee8021_restart_hw() when the resuming either. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 25 ++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 4fdc3dad3e85..0e17cb238643 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2271,7 +2271,8 @@ static void iwl_mvm_d3_test_disconn_work_iter(void *_data, u8 *mac, static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) { struct iwl_mvm *mvm = inode->i_private; - int remaining_time = 10; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); mvm->d3_test_active = false; @@ -2282,18 +2283,22 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; iwl_abort_notification_waits(&mvm->notif_wait); - ieee80211_restart_hw(mvm->hw); + if (!unified_image) { + int remaining_time = 10; - /* wait for restart and disconnect all interfaces */ - while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && - remaining_time > 0) { - remaining_time--; - msleep(1000); + ieee80211_restart_hw(mvm->hw); + + /* wait for restart and disconnect all interfaces */ + while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && + remaining_time > 0) { + remaining_time--; + msleep(1000); + } + + if (remaining_time == 0) + IWL_ERR(mvm, "Timed out waiting for HW restart!\n"); } - if (remaining_time == 0) - IWL_ERR(mvm, "Timed out waiting for HW restart to finish!\n"); - ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d3_test_disconn_work_iter, mvm->keep_vif); From 5bfadc8255e2cd92be7538fd7dfa777c27f58be0 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Mon, 12 Sep 2016 10:24:19 +0300 Subject: [PATCH 015/305] iwlwifi: mvm: comply with fw_restart mod param on suspend If the suspend flow fails, we restart the hardware to go back to the D0 image (with non-unified images), but we don't comply with the fw_restart module parameter. If something goes wrong when starting the D3 image, we may want to debug it, so we should comply with the fw_restart flag to avoid clearing everything up and losing the firmware state when the error occurred. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e17cb238643..03a8fc586548 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1254,7 +1254,10 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, out: if (ret < 0) { iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); - ieee80211_restart_hw(mvm->hw); + if (mvm->restart_fw > 0) { + mvm->restart_fw--; + ieee80211_restart_hw(mvm->hw); + } iwl_mvm_free_nd(mvm); } out_noreset: From 3a732c65de427fdae67a243fd331356034b5a1e8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 9 Oct 2016 17:34:24 +0300 Subject: [PATCH 016/305] iwlwifi: mvm: wake the wait queue when the RX sync counter is zero When we sync the RX queues the driver waits to receive echo notification on all the RX queues. The wait queue is set with timeout until all queues have received the notification. However, iwl_mvm_rx_queue_notif() never woke up the wait queue, with the result of the counter value being checked only when the timeout expired. This may cause a latency of up to 1 second. Fixes: 0636b938214c ("iwlwifi: mvm: implement driver RX queues sync command") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 318efd814037..1db1dc13e988 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4121,7 +4121,6 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, struct iwl_mvm_internal_rxq_notif *notif, u32 size) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(notif_waitq); u32 qmask = BIT(mvm->trans->num_rx_queues) - 1; int ret; @@ -4143,7 +4142,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, } if (notif->sync) - ret = wait_event_timeout(notif_waitq, + ret = wait_event_timeout(mvm->rx_sync_waitq, atomic_read(&mvm->queue_sync_counter) == 0, HZ); WARN_ON_ONCE(!ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d17cbf603f7c..c60703e0c246 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -937,6 +937,7 @@ struct iwl_mvm { /* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */ spinlock_t d0i3_tx_lock; wait_queue_head_t d0i3_exit_waitq; + wait_queue_head_t rx_sync_waitq; /* BT-Coex */ struct iwl_bt_coex_profile_notif last_bt_notif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 05fe6dd1a2c8..4d35deb628bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -619,6 +619,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, spin_lock_init(&mvm->refs_lock); skb_queue_head_init(&mvm->d0i3_tx); init_waitqueue_head(&mvm->d0i3_exit_waitq); + init_waitqueue_head(&mvm->rx_sync_waitq); atomic_set(&mvm->queue_sync_counter, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a57c6ef5bc14..6c802cee900c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -547,7 +547,8 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, "Received expired RX queue sync message\n"); return; } - atomic_dec(&mvm->queue_sync_counter); + if (!atomic_dec_return(&mvm->queue_sync_counter)) + wake_up(&mvm->rx_sync_waitq); } switch (internal_notif->type) { From aa156c8aee22a24865bf94d0c4a5f604f687fa7d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 14 Oct 2016 10:15:35 -0300 Subject: [PATCH 017/305] rtc: asm9260: fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled so user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias $ After this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias alias: of:N*T*Calphascale,asm9260-rtcC* alias: of:N*T*Calphascale,asm9260-rtc Signed-off-by: Javier Martinez Canillas Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-asm9260.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c index 18a93d3e3f93..d36534965635 100644 --- a/drivers/rtc/rtc-asm9260.c +++ b/drivers/rtc/rtc-asm9260.c @@ -327,6 +327,7 @@ static const struct of_device_id asm9260_dt_ids[] = { { .compatible = "alphascale,asm9260-rtc", }, {} }; +MODULE_DEVICE_TABLE(of, asm9260_dt_ids); static struct platform_driver asm9260_rtc_driver = { .probe = asm9260_rtc_probe, From a3a0673b9db6fad2a3f7874c34e4b5cbc5fa01c6 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Tue, 18 Oct 2016 16:39:54 +0200 Subject: [PATCH 018/305] rtc: cmos: remove all __exit_p annotations I got the following stack trace under qemu: [ 7.575243] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 7.596098] IP: [] cmos_set_alarm+0x38/0x280 [ 7.615699] PGD 3ccbe067 [ 7.615923] PUD 3daf2067 [ 7.635156] PMD 0 [ 7.654358] Oops: 0000 [#1] SMP [ 7.673869] Modules linked in: [ 7.693235] CPU: 0 PID: 1701 Comm: hwclock Tainted: G W 4.9.0-rc1+ #24 [ 7.712455] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 [ 7.753569] task: ffff88003d88dc40 task.stack: ffffc90000224000 [ 7.773743] RIP: 0010:[] [] cmos_set_alarm+0x38/0x280 [ 7.794893] RSP: 0018:ffffc90000227c10 EFLAGS: 00010296 [ 7.815890] RAX: 000000000000001d RBX: ffffc90000227d28 RCX: ffffffff8182be78 [ 7.836057] RDX: 0000000000000001 RSI: 0000000000000202 RDI: 0000000000000202 [ 7.856612] RBP: ffffc90000227c48 R08: 0000000000000000 R09: 0000000000000001 [ 7.877561] R10: 00000000000001c0 R11: 00000000000001c0 R12: 0000000000000000 [ 7.897072] R13: ffff88003d96f400 R14: ffff88003dac6410 R15: ffff88003dac6420 [ 7.917403] FS: 00007f77f42d9700(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 7.938293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.958364] CR2: 0000000000000010 CR3: 000000003ccbb000 CR4: 00000000000006f0 [ 7.978028] Stack: [ 7.997120] ffff88003dac6000 ffff88003dac6410 0000000058049d01 ffffc90000227d28 [ 8.016993] ffff88003dac6000 ffff88003dac6410 ffff88003dac6420 ffffc90000227c98 [ 8.039505] ffffffff814f225d 0000001800227c98 000000090000002a 0000000900000011 [ 8.059985] Call Trace: [ 8.080110] [] __rtc_set_alarm+0x8d/0xa0 [ 8.099421] [] rtc_timer_enqueue+0x119/0x190 [ 8.119925] [] rtc_update_irq_enable+0xbe/0x100 [ 8.140583] [] rtc_dev_ioctl+0x3c0/0x480 [ 8.161162] [] ? user_path_at_empty+0x3a/0x50 [ 8.182717] [] do_vfs_ioctl+0x96/0x5c0 [ 8.204624] [] ? vfs_stat+0x16/0x20 [ 8.225994] [] ? SyS_newstat+0x15/0x30 [ 8.247043] [] SyS_ioctl+0x47/0x80 [ 8.267191] [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 8.288719] Code: 6a 81 48 89 e5 41 57 41 56 41 55 49 89 fd 41 54 53 48 89 f3 48 c7 c6 20 c4 78 81 48 83 ec 10 e8 8f 00 ef ff 4d 8b a5 a0 00 00 00 <41> 8b 44 24 10 85 c0 0f 8e 2b 02 00 00 4c 89 ef 31 c0 b9 53 01 [ 8.335233] RIP [] cmos_set_alarm+0x38/0x280 [ 8.357096] RSP [ 8.379051] CR2: 0000000000000010 [ 8.401736] ---[ end trace 5cbcd83a1f225ed3 ]--- This occur only when CONFIG_DEBUG_TEST_DRIVER_REMOVE is enabled and CONFIG_RTC_DRV_CMOS builtin. When cmos_set_alarm() is called dev is NULL and so trigger the deref via cmos->irq The problem comes from that the device is removed but no remove function are called due to _exit_p(). This patch remove all _exit_p() annotation. Signed-off-by: Corentin Labbe Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index dd3d59806ffa..6f0e12e66296 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -776,7 +776,7 @@ static void cmos_do_shutdown(int rtc_irq) spin_unlock_irq(&rtc_lock); } -static void __exit cmos_do_remove(struct device *dev) +static void cmos_do_remove(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); struct resource *ports; @@ -1129,7 +1129,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) pnp_irq(pnp, 0)); } -static void __exit cmos_pnp_remove(struct pnp_dev *pnp) +static void cmos_pnp_remove(struct pnp_dev *pnp) { cmos_do_remove(&pnp->dev); } @@ -1161,7 +1161,7 @@ static struct pnp_driver cmos_pnp_driver = { .name = (char *) driver_name, .id_table = rtc_ids, .probe = cmos_pnp_probe, - .remove = __exit_p(cmos_pnp_remove), + .remove = cmos_pnp_remove, .shutdown = cmos_pnp_shutdown, /* flag ensures resume() gets called, and stops syslog spam */ @@ -1238,7 +1238,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev) return cmos_do_probe(&pdev->dev, resource, irq); } -static int __exit cmos_platform_remove(struct platform_device *pdev) +static int cmos_platform_remove(struct platform_device *pdev) { cmos_do_remove(&pdev->dev); return 0; @@ -1263,7 +1263,7 @@ static void cmos_platform_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:rtc_cmos"); static struct platform_driver cmos_platform_driver = { - .remove = __exit_p(cmos_platform_remove), + .remove = cmos_platform_remove, .shutdown = cmos_platform_shutdown, .driver = { .name = driver_name, From e0d9727c111a5917a1184c71c1a8e6f78c7fc41d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 13 Oct 2016 10:07:07 +0300 Subject: [PATCH 019/305] iwlwifi: pcie: fix SPLC structure parsing The SPLC data parsing is too restrictive and was not trying find the correct element for WiFi. This causes problems with some BIOSes where the SPLC method exists, but doesn't have a WiFi entry on the first element of the list. The domain type values are also incorrect according to the specification. Fix this by complying with the actual specification. Additionally, replace all occurrences of SPLX to SPLC, since SPLX is only a structure internal to the ACPI tables, and may not even exist. Fixes: bcb079a14d75 ("iwlwifi: pcie: retrieve and parse ACPI power limitations") Reported-by: Chris Rorvick Tested-by: Paul Bolle Tested-by: Chris Rorvick Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 81 +++++++++++-------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 001be406a3d3..2f8134b2a504 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -541,48 +541,64 @@ static const struct pci_device_id iwl_hw_card_ids[] = { MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); #ifdef CONFIG_ACPI -#define SPL_METHOD "SPLC" -#define SPL_DOMAINTYPE_MODULE BIT(0) -#define SPL_DOMAINTYPE_WIFI BIT(1) -#define SPL_DOMAINTYPE_WIGIG BIT(2) -#define SPL_DOMAINTYPE_RFEM BIT(3) +#define ACPI_SPLC_METHOD "SPLC" +#define ACPI_SPLC_DOMAIN_WIFI (0x07) -static u64 splx_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splx) +static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) { - union acpi_object *limits, *domain_type, *power_limit; + union acpi_object *data_pkg, *dflt_pwr_limit; + int i; - if (splx->type != ACPI_TYPE_PACKAGE || - splx->package.count != 2 || - splx->package.elements[0].type != ACPI_TYPE_INTEGER || - splx->package.elements[0].integer.value != 0) { - IWL_ERR(trans, "Unsupported splx structure\n"); + /* We need at least two elements, one for the revision and one + * for the data itself. Also check that the revision is + * supported (currently only revision 0). + */ + if (splc->type != ACPI_TYPE_PACKAGE || + splc->package.count < 2 || + splc->package.elements[0].type != ACPI_TYPE_INTEGER || + splc->package.elements[0].integer.value != 0) { + IWL_DEBUG_INFO(trans, + "Unsupported structure returned by the SPLC method. Ignoring.\n"); return 0; } - limits = &splx->package.elements[1]; - if (limits->type != ACPI_TYPE_PACKAGE || - limits->package.count < 2 || - limits->package.elements[0].type != ACPI_TYPE_INTEGER || - limits->package.elements[1].type != ACPI_TYPE_INTEGER) { - IWL_ERR(trans, "Invalid limits element\n"); + /* loop through all the packages to find the one for WiFi */ + for (i = 1; i < splc->package.count; i++) { + union acpi_object *domain; + + data_pkg = &splc->package.elements[i]; + + /* Skip anything that is not a package with the right + * amount of elements (i.e. at least 2 integers). + */ + if (data_pkg->type != ACPI_TYPE_PACKAGE || + data_pkg->package.count < 2 || + data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) + continue; + + domain = &data_pkg->package.elements[0]; + if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI) + break; + + data_pkg = NULL; + } + + if (!data_pkg) { + IWL_DEBUG_INFO(trans, + "No element for the WiFi domain returned by the SPLC method.\n"); return 0; } - domain_type = &limits->package.elements[0]; - power_limit = &limits->package.elements[1]; - if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) { - IWL_DEBUG_INFO(trans, "WiFi power is not limited\n"); - return 0; - } - - return power_limit->integer.value; + dflt_pwr_limit = &data_pkg->package.elements[1]; + return dflt_pwr_limit->integer.value; } static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { acpi_handle pxsx_handle; acpi_handle handle; - struct acpi_buffer splx = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL}; acpi_status status; pxsx_handle = ACPI_HANDLE(&pdev->dev); @@ -593,23 +609,24 @@ static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) } /* Get the method's handle */ - status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle); + status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD, + &handle); if (ACPI_FAILURE(status)) { - IWL_DEBUG_INFO(trans, "SPL method not found\n"); + IWL_DEBUG_INFO(trans, "SPLC method not found\n"); return; } /* Call SPLC with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &splx); + status = acpi_evaluate_object(handle, NULL, NULL, &splc); if (ACPI_FAILURE(status)) { IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status); return; } - trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer); + trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer); IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", trans->dflt_pwr_limit); - kfree(splx.pointer); + kfree(splc.pointer); } #else /* CONFIG_ACPI */ From 5a143db8c4a28dab6423cb6197e9f1389da375f2 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 09:28:53 +0300 Subject: [PATCH 020/305] iwlwifi: mvm: fix netdetect starting/stopping for unified images With unified images, we need to make sure the net-detect scan is stopped after resuming, since we don't restart the FW. Also, we need to make sure we check if there are enough scan slots available to run it, as we do with other scans. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 19 +++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 33 +++++++++++++++---- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 03a8fc586548..b88e2048ae0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1087,6 +1087,15 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm, ret = iwl_mvm_switch_to_d3(mvm); if (ret) return ret; + } else { + /* In theory, we wouldn't have to stop a running sched + * scan in order to start another one (for + * net-detect). But in practice this doesn't seem to + * work properly, so stop any running sched_scan now. + */ + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, true); + if (ret) + return ret; } /* rfkill release can be either for wowlan or netdetect */ @@ -2091,6 +2100,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_mvm_update_changed_regdom(mvm); if (mvm->net_detect) { + /* If this is a non-unified image, we restart the FW, + * so no need to stop the netdetect scan. If that + * fails, continue and try to get the wake-up reasons, + * but trigger a HW restart by keeping a failure code + * in ret. + */ + if (unified_image) + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_NETDETECT, + false); + iwl_mvm_query_netdetect_reasons(mvm, vif); /* has unlocked the mutex, so skip that */ goto out; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f279fdd6eb44..fa9743205491 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1199,6 +1199,9 @@ static int iwl_mvm_num_scans(struct iwl_mvm *mvm) static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) { + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); + /* This looks a bit arbitrary, but the idea is that if we run * out of possible simultaneous scans and the userspace is * trying to run a scan type that is already running, we @@ -1225,12 +1228,30 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EBUSY; return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, true); case IWL_MVM_SCAN_NETDETECT: - /* No need to stop anything for net-detect since the - * firmware is restarted anyway. This way, any sched - * scans that were running will be restarted when we - * resume. - */ - return 0; + /* For non-unified images, there's no need to stop + * anything for net-detect since the firmware is + * restarted anyway. This way, any sched scans that + * were running will be restarted when we resume. + */ + if (!unified_image) + return 0; + + /* If this is a unified image and we ran out of scans, + * we need to stop something. Prefer stopping regular + * scans, because the results are useless at this + * point, and we should be able to keep running + * another scheduled scan while suspended. + */ + if (mvm->scan_status & IWL_MVM_SCAN_REGULAR_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, + true); + if (mvm->scan_status & IWL_MVM_SCAN_SCHED_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, + true); + + /* fall through, something is wrong if no scan was + * running but we ran out of scans. + */ default: WARN_ON(1); break; From 368e21aebe9535c1643b272aaa9819298a6bc3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Oct 2016 21:02:04 +0300 Subject: [PATCH 021/305] rtc: cmos: Don't enable interrupts in the middle of the interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using spin_lock_irq()/spin_unlock_irq() from within the interrupt handler is a no-no. Let's save/restore the flags to avoid turning on interrupts prematurely. We hit this in a bunch of our CI systems, but for whatever reason I wasn't able to reproduce on my own machine, so this fix is just based on the backtrace. [ 202.634918] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:2729 trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634919] DEBUG_LOCKS_WARN_ON(current->hardirq_context) [ 202.634929] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec snd_hwdep i2c_designware_platform i2c_designware_core snd_hda_core mei_me mei snd_pcm r8169 mii sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915] [ 202.634930] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U 4.9.0-rc1-CI-CI_DRM_1734+ #1 [ 202.634931] Hardware name: GIGABYTE M4HM87P-00/M4HM87P-00, BIOS F6 12/10/2014 [ 202.634933] ffff88011ea03d68 ffffffff8142dce5 ffff88011ea03db8 0000000000000000 [ 202.634934] ffff88011ea03da8 ffffffff8107e496 00000aa900000002 ffffffff81e249a0 [ 202.634935] ffffffff81815637 ffffffff82e7c280 0000000000000000 0000000000000004 [ 202.634936] Call Trace: [ 202.634939] [ 202.634939] [] dump_stack+0x67/0x92 [ 202.634941] [] __warn+0xc6/0xe0 [ 202.634944] [] ? _raw_spin_unlock_irq+0x27/0x50 [ 202.634945] [] warn_slowpath_fmt+0x4a/0x50 [ 202.634946] [] trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634948] [] trace_hardirqs_on+0xd/0x10 [ 202.634949] [] _raw_spin_unlock_irq+0x27/0x50 [ 202.634951] [] rtc_handler+0x32/0xa0 [ 202.634954] [] acpi_ev_fixed_event_detect+0xd4/0xfb [ 202.634956] [] acpi_ev_sci_xrupt_handler+0xf/0x2d [ 202.634957] [] acpi_irq+0x11/0x2c [ 202.634960] [] __handle_irq_event_percpu+0x58/0x370 [ 202.634961] [] handle_irq_event_percpu+0x1e/0x50 [ 202.634962] [] handle_irq_event+0x34/0x60 [ 202.634963] [] handle_fasteoi_irq+0xa6/0x170 [ 202.634966] [] handle_irq+0x15/0x20 [ 202.634967] [] do_IRQ+0x68/0x130 [ 202.634968] [] common_interrupt+0x89/0x89 [ 202.634970] [ 202.634970] [] ? mwait_idle+0x93/0x210 [ 202.634971] [] ? mwait_idle+0x8a/0x210 [ 202.634972] [] arch_cpu_idle+0xa/0x10 [ 202.634973] [] default_idle_call+0x1e/0x30 [ 202.634974] [] cpu_startup_entry+0x17c/0x1f0 [ 202.634976] [] rest_init+0x127/0x130 [ 202.634978] [] start_kernel+0x3f6/0x403 [ 202.634980] [] x86_64_start_reservations+0x2a/0x2c [ 202.634981] [] x86_64_start_kernel+0x173/0x186 [ 202.634982] ---[ end trace 293c99618fa08d34 ]--- Cc: Gabriele Mazzotta Cc: Alexandre Belloni Fixes: 983bf1256edb ("rtc: cmos: Clear ACPI-driven alarms upon resume") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 6f0e12e66296..7030d7cd3861 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -996,8 +996,9 @@ static u32 rtc_handler(void *context) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned char rtc_control = 0; unsigned char rtc_intr; + unsigned long flags; - spin_lock_irq(&rtc_lock); + spin_lock_irqsave(&rtc_lock, flags); if (cmos_rtc.suspend_ctrl) rtc_control = CMOS_READ(RTC_CONTROL); if (rtc_control & RTC_AIE) { @@ -1006,7 +1007,7 @@ static u32 rtc_handler(void *context) rtc_intr = CMOS_READ(RTC_INTR_FLAGS); rtc_update_irq(cmos->rtc, 1, rtc_intr); } - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); pm_wakeup_event(dev, 0); acpi_clear_event(ACPI_EVENT_RTC); From 989cea5c14be024e879c0055dc6d033680a52610 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 21 Oct 2016 01:13:33 +1100 Subject: [PATCH 022/305] kbuild: prevent lib-ksyms.o rebuilds Signed-off-by: Nicholas Piggin Reported-by: Russell King Tested-by: Russell King Signed-off-by: Michal Marek --- scripts/Makefile.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index de46ab03f063..e1f25d6d132e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -430,6 +430,9 @@ cmd_export_list = $(OBJDUMP) -h $< | \ $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) + +targets += $(obj)/lib-ksyms.o + endif # From b7f865ede20c87073216f77fe97f6fc56666e3da Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 25 Oct 2016 01:08:31 +0800 Subject: [PATCH 023/305] ARM: dts: sun8i: fix the pinmux for UART1 When the patch is applied, the allwinner,driver and allwinner,pull properties are removed. Although they're described to be optional in the devicetree binding, without them, the pinmux cannot be initialized, and the uart cannot be used. Add them back to fix the problem, and makes the bluetooth on iNet D978 Rev2 board work. Fixes: 82eec384249f (ARM: dts: sun8i: add pinmux for UART1 at PG) Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a23-a33.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index 48fc24f36fcb..300a1bd5a6ec 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -282,11 +282,15 @@ uart1_pins_a: uart1@0 { allwinner,pins = "PG6", "PG7"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; uart1_pins_cts_rts_a: uart1-cts-rts@0 { allwinner,pins = "PG8", "PG9"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; mmc0_pins_a: mmc0@0 { From d3532ea6ce4ea501e421d130555e59edc2945f99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:13:40 +0200 Subject: [PATCH 024/305] brcmfmac: avoid maybe-uninitialized warning in brcmf_cfg80211_start_ap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bugfix added a sanity check around the assignment and use of the 'is_11d' variable, which looks correct to me, but as the function is rather complex already, this confuses the compiler to the point where it can no longer figure out if the variable is always initialized correctly: brcm80211/brcmfmac/cfg80211.c: In function ‘brcmf_cfg80211_start_ap’: brcm80211/brcmfmac/cfg80211.c:4586:10: error: ‘is_11d’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an initialization for the newly introduced case in which the variable should not really be used, in order to make the warning go away. Fixes: b3589dfe0212 ("brcmfmac: ignore 11d configuration errors") Cc: Hante Meuleman Cc: Arend van Spriel Cc: Kalle Valo Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b777e1b2f87a..78d9966a3957 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4516,7 +4516,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* store current 11d setting */ if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d)) { - supports_11d = false; + is_11d = supports_11d = false; } else { country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, settings->beacon.tail_len, From bb6a6e8e091353770074608c1d1bfde0e20b8154 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:24 +0800 Subject: [PATCH 025/305] netfilter: nft_dynset: fix panic if NFT_SET_HASH is not enabled When CONFIG_NFT_SET_HASH is not enabled and I input the following rule: "nft add rule filter output flow table test {ip daddr counter }", kernel panic happened on my system: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) [...] Call Trace: [] ? nft_dynset_eval+0x56/0x100 [nf_tables] [] nft_do_chain+0xfb/0x4e0 [nf_tables] [] ? nf_conntrack_tuple_taken+0x61/0x210 [nf_conntrack] [] ? get_unique_tuple+0x136/0x560 [nf_nat] [] ? __nf_ct_ext_add_length+0x111/0x130 [nf_conntrack] [] ? nf_nat_setup_info+0x87/0x3b0 [nf_nat] [] ? ipt_do_table+0x327/0x610 [] ? __nf_nat_alloc_null_binding+0x57/0x80 [nf_nat] [] nft_ipv4_output+0xaf/0xd0 [nf_tables_ipv4] [] nf_iterate+0x55/0x60 [] nf_hook_slow+0x73/0xd0 Because in rbtree type set, ops->update is not implemented. So just keep it simple, in such case, report -EOPNOTSUPP to the user space. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 517f08767a3c..bfdb689664b0 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -139,6 +139,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(set); } + if (set->ops->update == NULL) + return -EOPNOTSUPP; + if (set->flags & NFT_SET_CONSTANT) return -EBUSY; From 61f9e2924f4981d626b3a931fed935f2fa3cb4de Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:25 +0800 Subject: [PATCH 026/305] netfilter: nf_tables: fix *leak* when expr clone fail When nft_expr_clone failed, a series of problems will happen: 1. module refcnt will leak, we call __module_get at the beginning but we forget to put it back if ops->clone returns fail 2. memory will be leaked, if clone fail, we just return NULL and forget to free the alloced element 3. set->nelems will become incorrect when set->size is specified. If clone fail, we should decrease the set->nelems Now this patch fixes these problems. And fortunately, clone fail will only happen on counter expression when memory is exhausted. Fixes: 086f332167d6 ("netfilter: nf_tables: add clone interface to expression operations") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- net/netfilter/nf_tables_api.c | 11 ++++++----- net/netfilter/nft_dynset.c | 16 ++++++++++------ net/netfilter/nft_set_hash.c | 4 ++-- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..741dcded5b4f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24db22257586..86e48aeb20be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3452,14 +3452,15 @@ void *nft_set_elem_init(const struct nft_set *set, return elem; } -void nft_set_elem_destroy(const struct nft_set *set, void *elem) +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); @@ -3812,7 +3813,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); @@ -4030,7 +4031,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4171,7 +4172,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index bfdb689664b0..31ca94793aa9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, ®s->data[priv->sreg_key], ®s->data[priv->sreg_data], timeout, GFP_ATOMIC); - if (elem == NULL) { - if (set->size) - atomic_dec(&set->nelems); - return NULL; - } + if (elem == NULL) + goto err1; ext = nft_set_elem_ext(set, elem); if (priv->expr != NULL && nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) - return NULL; + goto err2; return elem; + +err2: + nft_set_elem_destroy(set, elem, false); +err1: + if (set->size) + atomic_dec(&set->nelems); + return NULL; } static void nft_dynset_eval(const struct nft_expr *expr, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3794cb2fc788..88d9fc8343e7 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -120,7 +120,7 @@ out: return true; err2: - nft_set_elem_destroy(set, he); + nft_set_elem_destroy(set, he, true); err1: return false; } @@ -332,7 +332,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 38b5bda242f8..36493a7cae88 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe, true); } } From dab45060a56a9732b027d2031c1b6100bc75eea2 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:26 +0800 Subject: [PATCH 027/305] netfilter: nf_tables: fix race when create new element in dynset Packets may race when create the new element in nft_hash_update: CPU0 CPU1 lookup_fast - fail lookup_fast - fail new - ok new - ok insert - ok insert - fail(EEXIST) So when race happened, we reuse the existing element. Otherwise, these *racing* packets will not be handled properly. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 88d9fc8343e7..a3dface3e6e6 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he, *prev; struct nft_hash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -112,9 +112,18 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, he = new(set, expr, regs); if (he == NULL) goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) goto err2; + + /* Another cpu may race to insert the element with the same key */ + if (prev) { + nft_set_elem_destroy(set, he, true); + he = prev; + } + out: *ext = &he->ext; return true; From 444f901742d054a4cd5ff045871eac5131646cfb Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 24 Oct 2016 18:07:23 +0200 Subject: [PATCH 028/305] netfilter: nf_conntrack_sip: extend request line validation on SIP requests, so a fragmented TCP SIP packet from an allow header starting with INVITE,NOTIFY,OPTIONS,REFER,REGISTER,UPDATE,SUBSCRIBE Content-Length: 0 will not bet interpreted as an INVITE request. Also Request-URI must start with an alphabetic character. Confirm with RFC 3261 Request-Line = Method SP Request-URI SP SIP-Version CRLF Fixes: 30f33e6dee80 ("[NETFILTER]: nf_conntrack_sip: support method specific request/response handling") Signed-off-by: Ulrich Weber Acked-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 621b81c7bddc..c3fc14e021ec 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, handler = &sip_handlers[i]; if (handler->request == NULL) continue; - if (*datalen < handler->len || + if (*datalen < handler->len + 2 || strncasecmp(*dptr, handler->method, handler->len)) continue; + if ((*dptr)[handler->len] != ' ' || + !isalpha((*dptr)[handler->len+1])) + continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, &matchoff, &matchlen) <= 0) { From f1d505bb762e30bf316ff5d3b604914649d6aed3 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 25 Oct 2016 15:56:39 -0400 Subject: [PATCH 029/305] netfilter: nf_tables: fix type mismatch with error return from nft_parse_u32_check Commit 36b701fae12ac ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") introduced nft_parse_u32_check with a return value of "unsigned int", yet on error it returns "-ERANGE". This patch corrects the mismatch by changing the return value to "int", which happens to match the actual users of nft_parse_u32_check already. Found by Coverity, CID 1373930. Note that commit 21a9e0f1568ea ("netfilter: nft_exthdr: fix error handling in nft_exthdr_init()) attempted to address the issue, but did not address the return type of nft_parse_u32_check. Signed-off-by: John W. Linville Cc: Laura Garcia Liebana Cc: Pablo Neira Ayuso Cc: Dan Carpenter Fixes: 36b701fae12ac ("netfilter: nf_tables: validate maximum value...") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 741dcded5b4f..d79d1e9b9546 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86e48aeb20be..365d31b86816 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4422,7 +4422,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; From cdb436d181d21af4d273b49ec7734eecd6a37fe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 23:46:17 +0200 Subject: [PATCH 030/305] netfilter: conntrack: avoid excess memory allocation This is now a fixed-size extension, so we don't need to pass a variable alloc size. This (harmless) error results in allocating 32 instead of the needed 16 bytes for this extension as the size gets passed twice. Fixes: 23014011ba420 ("netfilter: conntrack: support a fixed size of 128 distinct labels") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif From 5747620257812530adda58cbff591fede6fb261e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:34:32 +0200 Subject: [PATCH 031/305] netfilter: ip_vs_sync: fix bogus maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the ip_vs_sync code with CONFIG_OPTIMIZE_INLINING on x86 confuses the compiler to the point where it produces a rather dubious warning message: net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] struct ip_vs_sync_conn_options opt; ^~~ net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem appears to be a combination of a number of factors, including the __builtin_bswap32 compiler builtin being slightly odd, having a large amount of code inlined into a single function, and the way that some functions only get partially inlined here. I've spent way too much time trying to work out a way to improve the code, but the best I've come up with is to add an explicit memset right before the ip_vs_seq structure is first initialized here. When the compiler works correctly, this has absolutely no effect, but in the case that produces the warning, the warning disappears. In the process of analysing this warning, I also noticed that we use memcpy to copy the larger ip_vs_sync_conn_options structure over two members of the ip_vs_conn structure. This works because the layout is identical, but seems error-prone, so I'm changing this in the process to directly copy the two members. This change seemed to have no effect on the object code or the warning, but it deals with the same data, so I kept the two changes together. Signed-off-by: Arnd Bergmann Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sync.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 1b07578bedf3..9350530c16c1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -283,6 +283,7 @@ struct ip_vs_sync_buff { */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { + memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); @@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa kfree(param->pe_data); } - if (opt) - memcpy(&cp->in_seq, opt, sizeof(*opt)); + if (opt) { + cp->in_seq = opt->in_seq; + cp->out_seq = opt->out_seq; + } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; From b9a321b48af40e0606009df8aff0a8c65dfbbfd8 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Sun, 30 Oct 2016 19:28:27 -0400 Subject: [PATCH 032/305] r8152: Fix broken RX checksums. The r8152 driver has been broken since (approx) 3.16.xx when support was added for hardware RX checksums on newer chip versions. Symptoms include random segfaults and silent data corruption over NFS. The hardware checksum logig does not work on the VER_02 dongles I have here when used with a slow embedded system CPU. Google reveals others reporting similar issues on Raspberry Pi. So, disable hardware RX checksum support for VER_02, and fix an obvious coding error for IPV6 checksums in the same function. Because this bug results in silent data corruption, it is a good candidate for back-porting to -stable >= 3.16.xx. Signed-off-by: Mark Lord Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44d439f50961..75c516889645 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01) + if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -1745,7 +1745,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) checksum = CHECKSUM_NONE; else checksum = CHECKSUM_UNNECESSARY; - } else if (RD_IPV6_CS) { + } else if (opts2 & RD_IPV6_CS) { if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF)) checksum = CHECKSUM_UNNECESSARY; else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF)) From c17c3cdff10b9f59ef1244a14604f10949f17117 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:03:05 +0800 Subject: [PATCH 033/305] netfilter: nf_tables: destroy the set if fail to add transaction When the memory is exhausted, then we will fail to add the NFT_MSG_NEWSET transaction. In such case, we should destroy the set before we free it. Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 365d31b86816..7d6a626b08f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; +err3: + ops->destroy(set); err2: kfree(set); err1: From b73b8a1ba598236296a46103d81c10d629d9a470 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:09:51 +0800 Subject: [PATCH 034/305] netfilter: nft_dup: do not use sreg_dev if the user doesn't specify it The NFTA_DUP_SREG_DEV attribute is not a must option, so we should use it in routing lookup only when the user specify it. Fixes: d877f07112f1 ("netfilter: nf_tables: add nft_dup expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_dup_ipv4.c | 6 ++++-- net/ipv6/netfilter/nft_dup_ipv6.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bf855e64fc45..0c01a270bf9f 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, struct in_addr gw = { .s_addr = (__force __be32)regs->data[priv->sreg_addr], }; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } @@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv4 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8bfd470cbe72..831f86e1ec08 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } @@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; From f89c56ce710afa65e1b2ead555b52c4807f34ff7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 26 Oct 2016 11:21:14 +0200 Subject: [PATCH 035/305] ipv6: Don't use ufo handling on later transformed packets Similar to commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets"), don't perform UFO on packets that will be IPsec transformed. To detect it we rely on the fact that headerlen in dst_entry is non-zero only for transformation bundles (xfrm_dst objects). Unwanted segmentation can be observed with a NETIF_F_UFO capable device, such as a dummy device: DEV=dum0 LEN=1493 ip li add $DEV type dummy ip addr add fc00::1/64 dev $DEV nodad ip link set $DEV up ip xfrm policy add dir out src fc00::1 dst fc00::2 \ tmpl src fc00::1 dst fc00::2 proto esp spi 1 ip xfrm state add src fc00::1 dst fc00::2 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b tcpdump -n -nn -i $DEV -t & socat /dev/zero,readbytes=$LEN udp6:[fc00::2]:$LEN tcpdump output before: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|48) IP6 fc00::1 > fc00::2: ESP(spi=0x00000001,seq=0x2), length 88 ... and after: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|80) Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..59eb4ed99ce8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1366,7 +1366,7 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, From 19bda36c4299ce3d7e5bce10bebe01764a655a6d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 28 Oct 2016 18:18:01 +0800 Subject: [PATCH 036/305] ipv6: add mtu lock check in __ip6_rt_update_pmtu Prior to this patch, ipv6 didn't do mtu lock check in ip6_update_pmtu. It leaded to that mtu lock doesn't really work when receiving the pkt of ICMPV6_PKT_TOOBIG. This patch is to add mtu lock check in __ip6_rt_update_pmtu just as ipv4 did in __ip_rt_update_pmtu. Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..7403d90dcb38 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_LOCAL) return; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + dst_confirm(dst); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) From ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Oct 2016 18:43:11 +0200 Subject: [PATCH 037/305] dctcp: avoid bogus doubling of cwnd after loss If a congestion control module doesn't provide .undo_cwnd function, tcp_undo_cwnd_reduction() will set cwnd to tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); ... which makes sense for reno (it sets ssthresh to half the current cwnd), but it makes no sense for dctcp, which sets ssthresh based on the current congestion estimate. This can cause severe growth of cwnd (eventually overflowing u32). Fix this by saving last cwnd on loss and restore cwnd based on that, similar to cubic and other algorithms. Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm") Cc: Lawrence Brakmo Cc: Andrew Shewmaker Cc: Glenn Judd Acked-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 10d728b6804c..ab37c6775630 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, From e551c32d57c88923f99f8f010e89ca7ed0735e83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Oct 2016 13:40:24 -0700 Subject: [PATCH 038/305] net: clear sk_err_soft in sk_clone_lock() At accept() time, it is possible the parent has a non zero sk_err_soft, leftover from a prior error. Make sure we do not leave this value in the child, as it makes future getsockopt(SO_ERROR) calls quite unreliable. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..df171acfe232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1543,6 +1543,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); From 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Oct 2016 11:02:36 -0700 Subject: [PATCH 039/305] net: mangle zero checksum in skb_checksum_help() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sending zero checksum is ok for TCP, but not for UDP. UDPv6 receiver should by default drop a frame with a 0 checksum, and UDPv4 would not verify the checksum and might accept a corrupted packet. Simply replace such checksum by 0xffff, regardless of transport. This error was caught on SIT tunnels, but seems generic. Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..eaad4c28069f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: From cbbf049a7c346180cc61ae0a9245c5d749d20a12 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 30 Oct 2016 10:25:42 +0200 Subject: [PATCH 040/305] qede: Fix statistics' strings for Tx/Rx queues When an interface is configured to use Tx/Rx-only queues, the length of the statistics would be shortened to accomodate only the statistics required per-each queue, and the values would be provided accordingly. However, the strings provided would still contain both Tx and Rx strings for each one of the queues [regardless of its configuration], which might lead to out-of-bound access when filling the buffers as well as incorrect statistics presented. Fixes: 9a4d7e86acf3 ("qede: Add support for Tx/Rx-only queues.") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qede/qede_ethtool.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 12251a1032d1..7567cc464b88 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -175,16 +175,23 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) { int tc; - for (j = 0; j < QEDE_NUM_RQSTATS; j++) - sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d: %s", i, qede_rqstats_arr[j].string); - k += QEDE_NUM_RQSTATS; - for (tc = 0; tc < edev->num_tc; tc++) { - for (j = 0; j < QEDE_NUM_TQSTATS; j++) + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + for (j = 0; j < QEDE_NUM_RQSTATS; j++) sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d.%d: %s", i, tc, - qede_tqstats_arr[j].string); - k += QEDE_NUM_TQSTATS; + "%d: %s", i, + qede_rqstats_arr[j].string); + k += QEDE_NUM_RQSTATS; + } + + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + for (j = 0; j < QEDE_NUM_TQSTATS; j++) + sprintf(buf + (k + j) * + ETH_GSTRING_LEN, + "%d.%d: %s", i, tc, + qede_tqstats_arr[j].string); + k += QEDE_NUM_TQSTATS; + } } } From 46d0847cdd4a3fc1920e56827b9189b9a105d362 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 30 Oct 2016 10:09:22 +0100 Subject: [PATCH 041/305] mlxsw: spectrum: Fix incorrect reuse of MID entries In the device, a MID entry represents a group of local ports, which can later be bound to a MDB entry. The lookup of an existing MID entry is currently done using the provided MC MAC address and VID, from the Linux bridge. However, this can result in an incorrect reuse of the same MID index in different VLAN-unaware bridges (same IP MC group and VID 0). Fix this by performing the lookup based on FID instead of VID, which is unique across different bridges. Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Acked-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9b22863a924b..97bbc1d21df8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -115,7 +115,7 @@ struct mlxsw_sp_rif { struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; - u16 vid; + u16 fid; u16 mid; unsigned int ref_count; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5e00c79e8133..1e2c8eca3af1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -929,12 +929,12 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) { - if (ether_addr_equal(mid->addr, addr) && mid->vid == vid) + if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; @@ -942,7 +942,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; u16 mid_idx; @@ -958,7 +958,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, set_bit(mid_idx, mlxsw_sp->br_mids.mapped); ether_addr_copy(mid->addr, addr); - mid->vid = vid; + mid->fid = fid; mid->mid = mid_idx; mid->ref_count = 0; list_add_tail(&mid->list, &mlxsw_sp->br_mids.list); @@ -991,9 +991,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, if (switchdev_trans_ph_prepare(trans)) return 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1137,7 +1137,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid_idx; int err = 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; From 460d2830b00db407be2b72ed792eb3596f245192 Mon Sep 17 00:00:00 2001 From: Lukas Resch Date: Mon, 10 Oct 2016 08:07:32 +0000 Subject: [PATCH 042/305] can: sja1000: plx_pci: Add support for Moxa CAN devices This patch adds support for Moxa CAN devices. Signed-off-by: Lukas Resch Signed-off-by: Christoph Zehentner Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/plx_pci.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index 3eb7430dffbf..f8ff25c8ee2e 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -142,6 +142,9 @@ struct plx_pci_card { #define CTI_PCI_VENDOR_ID 0x12c4 #define CTI_PCI_DEVICE_ID_CRG001 0x0900 +#define MOXA_PCI_VENDOR_ID 0x1393 +#define MOXA_PCI_DEVICE_ID 0x0100 + static void plx_pci_reset_common(struct pci_dev *pdev); static void plx9056_pci_reset_common(struct pci_dev *pdev); static void plx_pci_reset_marathon_pci(struct pci_dev *pdev); @@ -258,6 +261,14 @@ static struct plx_pci_card_info plx_pci_card_info_elcus = { /* based on PLX9030 */ }; +static struct plx_pci_card_info plx_pci_card_info_moxa = { + "MOXA", 2, + PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, + {0, 0x00, 0x00}, { {0, 0x00, 0x80}, {1, 0x00, 0x80} }, + &plx_pci_reset_common + /* based on PLX9052 */ +}; + static const struct pci_device_id plx_pci_tbl[] = { { /* Adlink PCI-7841/cPCI-7841 */ @@ -357,6 +368,13 @@ static const struct pci_device_id plx_pci_tbl[] = { 0, 0, (kernel_ulong_t)&plx_pci_card_info_elcus }, + { + /* moxa */ + MOXA_PCI_VENDOR_ID, MOXA_PCI_DEVICE_ID, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + (kernel_ulong_t)&plx_pci_card_info_moxa + }, { 0,} }; MODULE_DEVICE_TABLE(pci, plx_pci_tbl); From deb507f91f1adbf64317ad24ac46c56eeccfb754 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 24 Oct 2016 21:11:26 +0200 Subject: [PATCH 043/305] can: bcm: fix warning in bcm_connect/proc_register Andrey Konovalov reported an issue with proc_register in bcm.c. As suggested by Cong Wang this patch adds a lock_sock() protection and a check for unsuccessful proc_create_data() in bcm_connect(). Reference: http://marc.info/?l=linux-netdev&m=147732648731237 Reported-by: Andrey Konovalov Suggested-by: Cong Wang Signed-off-by: Oliver Hartkopp Acked-by: Cong Wang Tested-by: Andrey Konovalov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8e999ffdf28b..8af9d25ff988 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1549,24 +1549,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + int ret = 0; if (len < sizeof(*addr)) return -EINVAL; - if (bo->bound) - return -EISCONN; + lock_sock(sk); + + if (bo->bound) { + ret = -EISCONN; + goto fail; + } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(&init_net, addr->can_ifindex); - if (!dev) - return -ENODEV; - + if (!dev) { + ret = -ENODEV; + goto fail; + } if (dev->type != ARPHRD_CAN) { dev_put(dev); - return -ENODEV; + ret = -ENODEV; + goto fail; } bo->ifindex = dev->ifindex; @@ -1577,17 +1584,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - bo->bound = 1; - if (proc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); + if (!bo->bcm_proc_read) { + ret = -ENOMEM; + goto fail; + } } - return 0; + bo->bound = 1; + +fail: + release_sock(sk); + + return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, From 87557efc27f6a50140fb20df06a917f368ce3c66 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 31 Oct 2016 13:38:29 +0800 Subject: [PATCH 044/305] xen-netfront: do not cast grant table reference to signed short While grant reference is of type uint32_t, xen-netfront erroneously casts it to signed short in BUG_ON(). This would lead to the xen domU panic during boot-up or migration when it is attached with lots of paravirtual devices. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e17879dd5d5a..189a28dcd80d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cd26da4ff4eb7189921d4e7ad87e8adebb7b416b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:31 +0800 Subject: [PATCH 045/305] sctp: hold transport instead of assoc in sctp_diag In sctp_transport_lookup_process(), Commit 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") moved cb() out of rcu lock, but it put transport and hold assoc instead, and ignore that cb() still uses transport. It may cause a use-after-free issue. This patch is to hold transport instead of assoc there. Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fbb6feb8c27..71b75f9d9c1b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4480,12 +4480,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), if (!transport || !sctp_transport_hold(transport)) goto out; - sctp_association_hold(transport->asoc); - sctp_transport_put(transport); - rcu_read_unlock(); err = cb(transport, p); - sctp_association_put(transport->asoc); + sctp_transport_put(transport); out: return err; From 7c17fcc726903ffed1716351efdc617e752533ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:32 +0800 Subject: [PATCH 046/305] sctp: return back transport in __sctp_rcv_init_lookup Prior to this patch, it used a local variable to save the transport that is looked up by __sctp_lookup_association(), and didn't return it back. But in sctp_rcv, it is used to initialize chunk->transport. So when hitting this, even if it found the transport, it was still initializing chunk->transport with null instead. This patch is to return the transport back through transport pointer that is from __sctp_rcv_lookup_harder(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ea1d1cc06a..8e0bc58eec20 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; sctp_init_chunk_t *init; - struct sctp_transport *transport; struct sctp_af *af; /* @@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(net, laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); if (asoc) return asoc; } From dae399d7fdee84d8f5227a9711d95bb4e9a05d4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:33 +0800 Subject: [PATCH 047/305] sctp: hold transport instead of assoc when lookup assoc in rx path Prior to this patch, in rx path, before calling lock_sock, it needed to hold assoc when got it by __sctp_lookup_association, in case other place would free/put assoc. But in __sctp_lookup_association, it lookup and hold transport, then got assoc by transport->assoc, then hold assoc and put transport. It means it didn't hold transport, yet it was returned and later on directly assigned to chunk->transport. Without the protection of sock lock, the transport may be freed/put by other places, which would cause a use-after-free issue. This patch is to fix this issue by holding transport instead of assoc. As holding transport can make sure to access assoc is also safe, and actually it looks up assoc by searching transport rhashtable, to hold transport here makes more sense. Note that the function will be renamed later on on another patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- net/sctp/input.c | 32 ++++++++++++++++---------------- net/sctp/ipv6.c | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..31acc3f4f132 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, diff --git a/net/sctp/input.c b/net/sctp/input.c index 8e0bc58eec20..a01a56ec8b8c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb) * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { - if (asoc) { - sctp_association_put(asoc); + if (transport) { + sctp_transport_put(transport); asoc = NULL; + transport = NULL; } else { sctp_endpoint_put(ep); ep = NULL; @@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb) bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -283,8 +284,8 @@ discard_it: discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; @@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_put(sctp_assoc(rcvr)); + sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else @@ -363,6 +365,7 @@ done: static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; @@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); + sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else @@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - sctp_association_put(asoc); + sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ -void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) +void sctp_err_finish(struct sock *sk, struct sctp_transport *t) { bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(t); } /* @@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); } /* @@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association( goto out; asoc = t->asoc; - sctp_association_hold(asoc); *pt = t; - sctp_transport_put(t); - out: return asoc; } @@ -986,7 +986,7 @@ int sctp_has_association(struct net *net, struct sctp_transport *transport; if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { - sctp_association_put(asoc); + sctp_transport_put(transport); return 1; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f473779e8b1c..176af3080a2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); out: if (likely(idev != NULL)) in6_dev_put(idev); From fcdefccac976ee51dd6071832b842d8fb41c479c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Mon, 31 Oct 2016 13:32:03 -0400 Subject: [PATCH 048/305] bgmac: stop clearing DMA receive control register right after it is set Current bgmac code initializes some DMA settings in the receive control register for some hardware and then immediately clears those settings. Not clearing those settings results in ~420Mbps *improvement* in throughput; this system can now receive frames at line-rate on Broadcom 5871x hardware compared to ~520Mbps today. I also tested a few other values but found there to be no discernible difference in CPU utilization even if burst size and prefetching values are different. On the hardware tested there was no need to keep the code that cleared all but bits 16-17, but since there is a wide variety of hardware that used this driver (I did not look at all hardware docs for hardware using this IP block), I find it wise to move this call up and clear bits just after reading the default value from the hardware rather than completely removing it. This is a good candidate for -stable >=3.14 since that is when the code that was supposed to improve performance (but did not) was introduced. Signed-off-by: Andy Gospodarek Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...") Cc: Hauke Mehrtens Acked-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 31ca204b38d2..91cbf92de971 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, u32 ctl; ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); + + /* preserve ONLY bits 16-17 from current hardware value */ + ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) { ctl &= ~BGMAC_DMA_RX_BL_MASK; ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; @@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, ctl &= ~BGMAC_DMA_RX_PT_MASK; ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; } - ctl &= BGMAC_DMA_RX_ADDREXT_MASK; ctl |= BGMAC_DMA_RX_ENABLE; ctl |= BGMAC_DMA_RX_PARITY_DISABLE; ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; From 4efca4ed05cbdfd13ec3e8cb623fb77d6e4ab187 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Nov 2016 12:46:19 +1100 Subject: [PATCH 049/305] kbuild: modversions for EXPORT_SYMBOL() for asm Allow architectures to create asm/asm-prototypes.h file that provides C prototypes for exported asm functions, which enables proper CRC versions to be generated for them. Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 78 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index e1f25d6d132e..3e223c264469 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -159,7 +159,8 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(src)/%.c FORCE $(call if_changed_dep,cpp_i_c) -cmd_gensymtypes = \ +# These mirror gensymtypes_S and co below, keep them in synch. +cmd_gensymtypes_c = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ @@ -169,7 +170,7 @@ cmd_gensymtypes = \ quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ cmd_cc_symtypes_c = \ set -e; \ - $(call cmd_gensymtypes,true,$@) >/dev/null; \ + $(call cmd_gensymtypes_c,true,$@) >/dev/null; \ test -s $@ || rm -f $@ $(obj)/%.symtypes : $(src)/%.c FORCE @@ -198,9 +199,10 @@ else # the actual value of the checksum generated by genksyms cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< -cmd_modversions = \ + +cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ - $(call cmd_gensymtypes,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/.tmp_$(@F:.o=.ver); \ \ $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ @@ -268,13 +270,14 @@ endif # CONFIG_STACK_VALIDATION define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ - $(cmd_modversions) \ + $(cmd_modversions_c) \ $(cmd_objtool) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ + $(cmd_modversions_S) \ $(cmd_objtool) endef @@ -314,6 +317,39 @@ modkern_aflags := $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL) $(real-objs-m) : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) +# .S file exports must have their C prototypes defined in asm/asm-prototypes.h +# or a file that it includes, in order to get versioned symbols. We build a +# dummy C file that includes asm-prototypes and the EXPORT_SYMBOL lines from +# the .S file (with trailing ';'), and run genksyms on that, to extract vers. +# +# This is convoluted. The .S file must first be preprocessed to run guards and +# expand names, then the resulting exports must be constructed into plain +# EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed +# to make the genksyms input. +# +# These mirror gensymtypes_c and co above, keep them in synch. +cmd_gensymtypes_S = \ + (echo "\#include " ; \ + echo "\#include " ; \ + $(CPP) $(a_flags) $< | \ + grep ^___EXPORT_SYMBOL | \ + sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ + $(GENKSYMS) $(if $(1), -T $(2)) \ + $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ + $(if $(KBUILD_PRESERVE),-p) \ + -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) + +quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@ +cmd_cc_symtypes_S = \ + set -e; \ + $(call cmd_gensymtypes_S,true,$@) >/dev/null; \ + test -s $@ || rm -f $@ + +$(obj)/%.symtypes : $(src)/%.S FORCE + $(call cmd,cc_symtypes_S) + + quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< @@ -321,7 +357,37 @@ $(obj)/%.s: $(src)/%.S FORCE $(call if_changed_dep,cpp_s_S) quiet_cmd_as_o_S = AS $(quiet_modtag) $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +ifndef CONFIG_MODVERSIONS +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +ASM_PROTOTYPES := $(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/asm-prototypes.h) + +ifeq ($(ASM_PROTOTYPES),) +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +# versioning matches the C process described above, with difference that +# we parse asm-prototypes.h C header to get function definitions. + +cmd_as_o_S = $(CC) $(a_flags) -c -o $(@D)/.tmp_$(@F) $< + +cmd_modversions_S = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_S,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ + $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ + -T $(@D)/.tmp_$(@F:.o=.ver); \ + rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ + else \ + mv -f $(@D)/.tmp_$(@F) $@; \ + fi; +endif +endif $(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE $(call if_changed_rule,as_o_S) From b5a4a3eb4ec7382780aa153224780b9ecdc76ceb Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:26 -0700 Subject: [PATCH 050/305] drivers: net: xgene: fix: Disable coalescing on v1 hardware Since ethernet v1 hardware has a bug related to coalescing, disabling this feature. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 12 ------------ drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c481f104a8fe..5390ae89136c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -204,17 +204,6 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) return num_msgs; } -static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) -{ - u32 data = 0x7777; - - xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); -} - void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -929,5 +918,4 @@ struct xgene_ring_ops xgene_ring1_ops = { .clear = xgene_enet_clear_ring, .wr_cmd = xgene_enet_wr_cmd, .len = xgene_enet_ring_len, - .coalesce = xgene_enet_setup_coalescing, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 429f18fc5503..8158d4698734 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1188,7 +1188,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); } - pdata->ring_ops->coalesce(pdata->tx_ring[0]); + if (pdata->ring_ops->coalesce) + pdata->ring_ops->coalesce(pdata->tx_ring[0]); pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128; return 0; From f126df8503275facc96dd05a818086afbb89b77d Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:27 -0700 Subject: [PATCH 051/305] drivers: net: xgene: fix: Coalescing values for v2 hardware Changing the interrupt trigger region id to 2 and the corresponding threshold set0/set1 values to 8/16. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.h | 2 ++ drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8456337a237d..06e598c8bc16 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -55,8 +55,10 @@ enum xgene_enet_rm { #define PREFETCH_BUF_EN BIT(21) #define CSR_RING_ID_BUF 0x000c #define CSR_PBM_COAL 0x0014 +#define CSR_PBM_CTICK0 0x0018 #define CSR_PBM_CTICK1 0x001c #define CSR_PBM_CTICK2 0x0020 +#define CSR_PBM_CTICK3 0x0024 #define CSR_THRESHOLD0_SET1 0x0030 #define CSR_THRESHOLD1_SET1 0x0034 #define CSR_RING_NE_INT_MODE 0x017c diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c index 2b76732add5d..af51dd5844ce 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -30,7 +30,7 @@ static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK); ring_cfg[3] |= SET_BIT(X2_DEQINTEN); } - ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1); + ring_cfg[0] |= SET_VAL(X2_CFGCRID, 2); addr >>= 8; ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr); @@ -192,13 +192,15 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) { - u32 data = 0x7777; + u32 data = 0x77777777; xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK0, data); xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK3, data); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x08); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x10); } struct xgene_ring_ops xgene_ring2_ops = { From 7bb9f731d1026bd48b84cee7853cba7f5678193c Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 31 Oct 2016 18:18:42 -0500 Subject: [PATCH 052/305] net: qcom/emac: use correct value for SGMII_LN_UCDR_SO_GAIN_MODE0 The documentation says that SGMII_LN_UCDR_SO_GAIN_MODE0 should be set to 0, not 6, on the Qualcomm Technologies QDF2432. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 75c1b530e39e..72fe343c7a36 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -421,7 +421,7 @@ static const struct emac_reg_write sgmii_v2_laned[] = { /* CDR Settings */ {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, - {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)}, {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, /* TX/RX Settings */ From e7947ea770d0de434d38a0f823e660d3fd4bebb5 Mon Sep 17 00:00:00 2001 From: Isaac Boukris Date: Tue, 1 Nov 2016 02:41:35 +0200 Subject: [PATCH 053/305] unix: escape all null bytes in abstract unix domain socket Abstract unix domain socket may embed null characters, these should be translated to '@' when printed out to proc the same way the null prefix is currently being translated. This helps for tools such as netstat, lsof and the proc based implementation in ss to show all the significant bytes of the name (instead of getting cut at the first null occurrence). Signed-off-by: Isaac Boukris Signed-off-by: David S. Miller --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..5d1c14a2f268 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2812,7 +2812,8 @@ static int unix_seq_show(struct seq_file *seq, void *v) i++; } for ( ; i < len; i++) - seq_putc(seq, u->addr->name->sun_path[i]); + seq_putc(seq, u->addr->name->sun_path[i] ?: + '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); From 42fb18fd5852dba9c43ac008558e4bc8062bda57 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 1 Nov 2016 08:10:53 +0100 Subject: [PATCH 054/305] net/mlx5: Simplify a test 'create_root_ns()' does not return an error pointer, so the test can be simplified to be more consistent. Signed-off-by: Christophe JAILLET Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 89696048b045..914e5466f729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1690,7 +1690,7 @@ static int init_root_ns(struct mlx5_flow_steering *steering) { steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(steering->root_ns)) + if (!steering->root_ns) goto cleanup; if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) From b9b84fc07dbc6ac74d85f1b1a401b41c403fecb1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 1 Nov 2016 10:50:01 +0000 Subject: [PATCH 055/305] net: mv643xx_eth: ensure coalesce settings survive read-modify-write The coalesce settings behave badly when changing just one value: ... # ethtool -c eth0 rx-usecs: 249 ... # ethtool -C eth0 tx-usecs 250 ... # ethtool -c eth0 rx-usecs: 248 This occurs due to rounding errors when calculating the microseconds value - the divisons round down. This causes (eg) the rx-usecs to decrease by one every time the tx-usecs value is set as per the above. Fix this by making the divison round-to-nearest. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index bf5cc55ba24c..5b12022adf1f 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1381,6 +1381,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp) temp = (val & 0x003fff00) >> 8; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; @@ -1417,6 +1418,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp) temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; From 6c08d7ab23dd07c046e8de1520073053bdc76ae2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2016 09:49:26 +0100 Subject: [PATCH 056/305] drm/sun4i: Fix error handling 'sun4i_layers_init()' returns an error pointer in case of error, not NULL. So test it with IS_ERR. Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 0da9862ad8ed..077f3785439e 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -142,7 +142,7 @@ static int sun4i_drv_bind(struct device *dev) /* Create our layers */ drv->layers = sun4i_layers_init(drm); - if (!drv->layers) { + if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); ret = -EINVAL; goto free_drm; From 45788f1f5534fb02063ca077719592c2c3ba621e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Nov 2016 15:09:58 +0200 Subject: [PATCH 057/305] MAINTAINERS: Update MELLANOX MLX5 core VPI driver maintainers Add myself as a maintainer for mlx5 core driver as well. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4012c2f98617..53964ad4f2de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8053,6 +8053,7 @@ F: drivers/infiniband/hw/mlx4/ F: include/linux/mlx4/ MELLANOX MLX5 core VPI driver +M: Saeed Mahameed M: Matan Barak M: Leon Romanovsky L: netdev@vger.kernel.org From 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:12 +0800 Subject: [PATCH 058/305] ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() skb->cb may contain data from previous layers. In the observed scenario, the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so that small packets sent through the tunnel are mistakenly fragmented. This patch unconditionally clears the control buffer in ip6tunnel_xmit(), which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. Cc: stable@vger.kernel.org Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) From 4fd19c15decedd06d707e2691c24fce08700e2b1 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:13 +0800 Subject: [PATCH 059/305] ip6_udp_tunnel: remove unused IPCB related codes Some IPCB fields are currently set in udp_tunnel6_xmit_skb(), which are never used before it reaches ip6tunnel_xmit(), and past that point the control buffer is no longer interpreted as IPCB. This clears these unused IPCB related codes. Currently there is no skb scrubbing in ip6_udp_tunnel, otherwise IPCB(skb)->opt might need to be cleared for IPv4 packets, as shown in 5146d1f1511 ("tunnel: Clear IPCB(skb)->opt before dst_link_failure called"). Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_udp_tunnel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index a7520528ecd2..b283f293ee4a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, uh->len = htons(skb->len); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED - | IPSKB_REROUTED); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); From 269ebce4531b8edc4224259a02143181a1c1d77c Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Nov 2016 09:04:33 +0800 Subject: [PATCH 060/305] xen-netfront: cast grant table reference first to type int IS_ERR_VALUE() in commit 87557efc27f6a50140fb20df06a917f368ce3c66 ("xen-netfront: do not cast grant table reference to signed short") would not return true for error code unless we cast ref first to type int. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 189a28dcd80d..bf2744e1e3db 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cfbd950d5e6e649c6c1a88925feada64f890c894 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Oct 2016 08:46:32 +0200 Subject: [PATCH 061/305] video: ARM CLCD: fix Vexpress regression The CLCD does not come up on Versatile Express as it does not (currently) have a syscon node for controlling the block apart from the CLCD itself. Make sure the .init() function can bail out without an error making it probe again. Reported-by: Amit Pundir Signed-off-by: Linus Walleij Tested-by: Amit Pundir Tested-by: Nicolae Rosia Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/amba-clcd-versatile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c index 19ad8645d93c..e5d9bfc1703a 100644 --- a/drivers/video/fbdev/amba-clcd-versatile.c +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -526,8 +526,8 @@ int versatile_clcd_init_panel(struct clcd_fb *fb, np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); if (!np) { - dev_err(dev, "no Versatile syscon node\n"); - return -ENODEV; + /* Vexpress does not have this */ + return 0; } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; From 14135f30e33ce37b22529f73660d7369cf424375 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Nov 2016 16:04:36 -0700 Subject: [PATCH 062/305] inet: fix sleeping inside inet_wait_for_connect() Andrey reported this kernel warning: WARNING: CPU: 0 PID: 4608 at kernel/sched/core.c:7724 __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0xbc/0x210 kernel/sched/wait.c:178 Modules linked in: CPU: 0 PID: 4608 Comm: syz-executor Not tainted 4.9.0-rc2+ #320 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006625f7a0 ffffffff81b46914 ffff88006625f818 0000000000000000 ffffffff84052960 0000000000000000 ffff88006625f7e8 ffffffff81111237 ffff88006aceac00 ffffffff00001e2c ffffed000cc4beff ffffffff84052960 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] __warn+0x1a7/0x1f0 kernel/panic.c:550 [] warn_slowpath_fmt+0xac/0xd0 kernel/panic.c:565 [] __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 [< inline >] slab_pre_alloc_hook mm/slab.h:393 [< inline >] slab_alloc_node mm/slub.c:2634 [< inline >] slab_alloc mm/slub.c:2716 [] __kmalloc_track_caller+0x150/0x2a0 mm/slub.c:4240 [] kmemdup+0x24/0x50 mm/util.c:113 [] dccp_feat_clone_sp_val.part.5+0x4f/0xe0 net/dccp/feat.c:374 [< inline >] dccp_feat_clone_sp_val net/dccp/feat.c:1141 [< inline >] dccp_feat_change_recv net/dccp/feat.c:1141 [] dccp_feat_parse_options+0xaa1/0x13d0 net/dccp/feat.c:1411 [] dccp_parse_options+0x721/0x1010 net/dccp/options.c:128 [] dccp_rcv_state_process+0x200/0x15b0 net/dccp/input.c:644 [] dccp_v4_do_rcv+0xf4/0x1a0 net/dccp/ipv4.c:681 [< inline >] sk_backlog_rcv ./include/net/sock.h:872 [] __release_sock+0x126/0x3a0 net/core/sock.c:2044 [] release_sock+0x59/0x1c0 net/core/sock.c:2502 [< inline >] inet_wait_for_connect net/ipv4/af_inet.c:547 [] __inet_stream_connect+0x5d2/0xbb0 net/ipv4/af_inet.c:617 [] inet_stream_connect+0x55/0xa0 net/ipv4/af_inet.c:656 [] SYSC_connect+0x244/0x2f0 net/socket.c:1533 [] SyS_connect+0x24/0x30 net/socket.c:1514 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:209 Unlike commit 26cabd31259ba43f68026ce3f62b78094124333f ("sched, net: Clean up sk_wait_event() vs. might_sleep()"), the sleeping function is called before schedule_timeout(), this is indeed a bug. Fix this by moving the wait logic to the new API, it is similar to commit ff960a731788a7408b6f66ec4fd772ff18833211 ("netdev, sched/wait: Fix sleeping inside wait event"). Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Eric Dumazet Cc: Peter Zijlstra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9648c97e541f..5ddf5cda07f4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ @@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } From 0b53df1e9e07984a93ad3454686740fc2f4d6b4b Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 2 Nov 2016 10:52:53 +0530 Subject: [PATCH 063/305] cxgb4: correct device ID of T6 adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 50812a1d67bd..df1573c4a659 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -178,9 +178,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6005), CH_PCI_ID_TABLE_FENTRY(0x6006), CH_PCI_ID_TABLE_FENTRY(0x6007), + CH_PCI_ID_TABLE_FENTRY(0x6008), CH_PCI_ID_TABLE_FENTRY(0x6009), CH_PCI_ID_TABLE_FENTRY(0x600d), - CH_PCI_ID_TABLE_FENTRY(0x6010), CH_PCI_ID_TABLE_FENTRY(0x6011), CH_PCI_ID_TABLE_FENTRY(0x6014), CH_PCI_ID_TABLE_FENTRY(0x6015), From 9512925a2cc2b1cd0206bb93bad200a69716f998 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Wed, 2 Nov 2016 16:36:46 +0200 Subject: [PATCH 064/305] qede: Correctly map aggregation replacement pages Driver allocates replacement buffers before-hand to make sure whenever an aggregation begins there would be a replacement for the Rx buffers, as we can't release the buffer until aggregation is terminated and driver logic assumes the Rx rings are always full. For every other Rx page that's being allocated [I.e., regular] the page is being completely mapped while for the replacement buffers only the first portion of the page is being mapped. This means that: a. Once replacement buffer replenishes the regular Rx ring, assuming there's more than a single packet on page we'd post unmapped memory toward HW [assuming mapping is actually done in granularity smaller than page]. b. Unmaps are being done for the entire page, which is incorrect. Fixes: 55482edc25f06 ("qede: Add slowpath/fastpath support and enable hardware GRO") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 7def29aaf65c..85f46dbecd5b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2839,7 +2839,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) } mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0, - rxq->rx_buf_size, DMA_FROM_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "Failed to map TPA replacement buffer\n"); From ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 07:53:17 -0700 Subject: [PATCH 065/305] tcp: fix potential memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imagine initial value of max_skb_frags is 17, and last skb in write queue has 15 frags. Then max_skb_frags is lowered to 14 or smaller value. tcp_sendmsg() will then be allowed to add additional page frags and eventually go past MAX_SKB_FRAGS, overflowing struct skb_shared_info. Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") Signed-off-by: Eric Dumazet Cc: Hans Westgaard Ry Cc: Håkon Bugge Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..18238ef8135a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1241,7 +1241,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: [PATCH 066/305] net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b82d4d94834..304a8e17bc87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return true; #endif return false; From 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 2 Nov 2016 16:36:17 -0400 Subject: [PATCH 067/305] ipv4: allow local fragmentation in ip_finish_output_gso() Some configurations (e.g. geneve interface with default MTU of 1500 over an ethernet interface with 1500 MTU) result in the transmission of packets that exceed the configured MTU. While this should be considered to be a "bad" configuration, it is still allowed and should not result in the sending of packets that exceed the configured MTU. Fix by dropping the assumption in ip_finish_output_gso() that locally originated gso packets will never need fragmentation. Basic testing using iperf (observing CPU usage and bandwidth) have shown no measurable performance impact for traffic not requiring fragmentation. Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") Reported-by: Jan Tluka Signed-off-by: Lance Richardson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 6 ++---- net/ipv4/ip_tunnel_core.c | 11 ----------- net/ipv4/ipmr.c | 2 +- 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..d3a107850a41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,8 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) -#define IPSKB_L3SLAVE BIT(8) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216839..9f0a7b96646f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..49714010ac2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -239,11 +239,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 777bc1883870..fed3d29f9eb3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..27089f5ebbb1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output From 79d8665b9545e128637c51cf7febde9c493b6481 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 14:41:50 -0700 Subject: [PATCH 068/305] tcp: fix return value for partial writes After my commit, tcp_sendmsg() might restart its loop after processing socket backlog. If sk_err is set, we blindly return an error, even though we copied data to user space before. We should instead return number of bytes that could be copied, otherwise user space might resend data and corrupt the stream. This might happen if another thread is using recvmsg(MSG_ERRQUEUE) to process timestamps. Issue was diagnosed by Soheil and Willem, big kudos to them ! Fixes: d41a69f1d390f ("tcp: make tcp_sendmsg() aware of socket backlog") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Soheil Hassas Yeganeh Cc: Yuchung Cheng Cc: Neal Cardwell Tested-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18238ef8135a..814af89c1bd3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1164,7 +1164,7 @@ restart: err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; + goto do_error; sg = !!(sk->sk_route_caps & NETIF_F_SG); From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: [PATCH 069/305] dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 5 +++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/net/core/sock.c b/net/core/sock.c index df171acfe232..5e3ca414357e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..dff7cfab1da4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -868,7 +868,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..09c4e19aa285 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -738,7 +738,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) From 346da62cc186c4b4b1ac59f87f4482b47a047388 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 18:04:24 -0700 Subject: [PATCH 070/305] dccp: do not send reset to already closed sockets Andrey reported following warning while fuzzing with syzkaller WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000 ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] panic+0x1bc/0x39d kernel/panic.c:179 [] __warn+0x1cc/0x1f0 kernel/panic.c:542 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [] dccp_set_state+0x229/0x290 net/dccp/proto.c:83 [] dccp_close+0x612/0xc10 net/dccp/proto.c:1016 [] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415 [] sock_release+0x8e/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x29d/0x720 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf8/0x170 kernel/task_work.c:116 [< inline >] exit_task_work include/linux/task_work.h:21 [] do_exit+0x883/0x2ac0 kernel/exit.c:828 [] do_group_exit+0x10e/0x340 kernel/exit.c:931 [] get_signal+0x634/0x15a0 kernel/signal.c:2307 [] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807 [] exit_to_usermode_loop+0xe5/0x130 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a8/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc0/0xc2 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Fix this the same way we did for TCP in commit 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/dccp/proto.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 41e65804ddf5..9fe25bf63296 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); From 6706a97fec963d6cb3f7fc2978ec1427b4651214 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 19:00:40 -0700 Subject: [PATCH 071/305] dccp: fix out of bound access in dccp_v4_err() dccp_v4_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmp_socket_deliver() are more than enough. This patch might allow to process more ICMP messages, as some routers are still limiting the size of reflected bytes to 28 (RFC 792), instead of extended lengths (RFC 1812 4.3.2.3) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index dff7cfab1da4..b567c8725aea 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, From 93636d1f1f162ae89ae4f2a22a83bf4fd960724e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:21:20 -0700 Subject: [PATCH 072/305] netlink: netlink_diag_dump() runs without locks A recent commit removed locking from netlink_diag_dump() but forgot one error case. ===================================== [ BUG: bad unlock balance detected! ] 4.9.0-rc3+ #336 Not tainted ------------------------------------- syz-executor/4018 is trying to release lock ([ 36.220068] nl_table_lock ) at: [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 but there are no more locks to release! other info that might help us debug this: 3 locks held by syz-executor/4018: #0: [ 36.220068] ( sock_diag_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv+0x1b/0x40 #1: [ 36.220068] ( sock_diag_table_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv_msg+0x140/0x3a0 #2: [ 36.220068] ( nlk->cb_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] netlink_dump+0x50/0xac0 stack backtrace: CPU: 1 PID: 4018 Comm: syz-executor Not tainted 4.9.0-rc3+ #336 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff8800645df688 ffffffff81b46934 ffffffff84eb3e78 ffff88006ad85800 ffffffff82dc8683 ffffffff84eb3e78 ffff8800645df6b8 ffffffff812043ca dffffc0000000000 ffff88006ad85ff8 ffff88006ad85fd0 00000000ffffffff Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] print_unlock_imbalance_bug+0x17a/0x1a0 kernel/locking/lockdep.c:3388 [< inline >] __lock_release kernel/locking/lockdep.c:3512 [] lock_release+0x8e8/0xc60 kernel/locking/lockdep.c:3765 [< inline >] __raw_read_unlock ./include/linux/rwlock_api_smp.h:225 [] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 [] netlink_dump+0x397/0xac0 net/netlink/af_netlink.c:2110 Fixes: ad202074320c ("netlink: Use rhashtable walk interface in diag dump") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/netlink/diag.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index b2f0e986a6f4..a5546249fb10 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[1] = i; } else { - if (req->sdiag_protocol >= MAX_LINKS) { - read_unlock(&nl_table_lock); - rcu_read_unlock(); + if (req->sdiag_protocol >= MAX_LINKS) return -ENOENT; - } err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } From 1aa9d1a0e7eefcc61696e147d123453fc0016005 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:30:48 -0700 Subject: [PATCH 073/305] ipv6: dccp: fix out of bound access in dccp_v6_err() dccp_v6_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmpv6_notify() are more than enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 09c4e19aa285..b2a43af967e5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, From 29ab5a3b94c87382da06db88e96119911d557293 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 3 Nov 2016 08:16:20 -0200 Subject: [PATCH 074/305] ehea: fix operation state report Currently the ehea driver is missing a call to netif_carrier_off() before the interface bring-up; this is necessary in order to initialize the __LINK_STATE_NOCARRIER bit in the net_device state field. Otherwise, we observe state UNKNOWN on "ip address" command output. This patch adds a call to netif_carrier_off() on ehea's net device open callback. Reported-by: Xiong Zhou Reference-ID: IBM bz #137702, Red Hat bz #1089134 Signed-off-by: Guilherme G. Piccoli Signed-off-by: Douglas Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 54efa9a5167b..bd719e25dd76 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2446,6 +2446,8 @@ static int ehea_open(struct net_device *dev) netif_info(port, ifup, dev, "enabling port\n"); + netif_carrier_off(dev); + ret = ehea_up(dev); if (!ret) { port_napi_enable(port); From 990ff4d84408fc55942ca6644f67e361737b3d8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Nov 2016 08:59:46 -0700 Subject: [PATCH 075/305] ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped While fuzzing kernel with syzkaller, Andrey reported a nasty crash in inet6_bind() caused by DCCP lacking a required method. Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Arnaldo Carvalho de Melo Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b2a43af967e5..715e5d1dc107 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -958,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, From 00ffc1ba02d876478c125e4305f9a02d40c6d284 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:35 -0700 Subject: [PATCH 076/305] genetlink: fix a memory leak on error path In __genl_register_family(), when genl_validate_assign_mc_groups() fails, we forget to free the memory we possibly allocate for family->attrbuf. Note, some callers call genl_unregister_family() to clean up on error path, it doesn't work because the family is inserted to the global list in the nearly last step. Cc: Jakub Kicinski Cc: Johannes Berg Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..49c28e8ef01b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family) err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_free; list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); @@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family) return 0; +errout_free: + kfree(family->attrbuf); errout_locked: genl_unlock_all(); errout: From 243d52126184b072a18fe2130ce0008f8aa3a340 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:36 -0700 Subject: [PATCH 077/305] taskstats: fix the length of cgroupstats_cmd_get_policy cgroupstats_cmd_get_policy is [CGROUPSTATS_CMD_ATTR_MAX+1], taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1], but their family.maxattr is TASKSTATS_CMD_ATTR_MAX. CGROUPSTATS_CMD_ATTR_MAX is less than TASKSTATS_CMD_ATTR_MAX, so we could end up accessing out-of-bound. Change cgroupstats_cmd_get_policy to TASKSTATS_CMD_ATTR_MAX+1, this is safe because the rest are initialized to 0's. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- kernel/taskstats.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b3f05ee20d18..cbb387a265db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -54,7 +54,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { +/* + * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family. + * Make sure they are always aligned. + */ +static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; From 87dc02551c509703123a60dd7f043d75e92a6aed Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:42 +0200 Subject: [PATCH 078/305] net/mlx5e: Fix XDP error path of mlx5e_open_channel() In case of mlx5e_open_rq fails the error handling will jump to label err_close_xdp_sq and will try to close the xdp_sq unconditionally. xdp_sq is valid only in case of XDP use cases, i.e priv->xdp_prog is not null. To fix this in this patch we test xdp_sq validity prior to closing it. In addition we now close the xdp_sq.cq as well. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f4c687ce4c59..c83619d081d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1512,7 +1512,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - mlx5e_close_sq(&c->xdp_sq); + if (priv->xdp_prog) { + mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_cq(&c->xdp_sq.cq); + } err_close_sqs: mlx5e_close_sqs(c); From d7a0ecab380c62ccd9fbe2e08dd720f7a367d6ee Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:43 +0200 Subject: [PATCH 079/305] net/mlx5e: Re-arrange XDP SQ/CQ creation In mlx5e_open_channel CQs must be created before napi is enabled. Here we move the XDP CQ creation to satisfy that fact. mlx5e_close_channel is already working according to the right order. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c83619d081d8..84e8b250e2af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1445,6 +1445,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; + c->xdp = !!priv->xdp_prog; if (priv->params.rx_am_enabled) rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); @@ -1468,6 +1469,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_tx_cqs; + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation) : 0; + if (err) + goto err_close_rx_cq; + napi_enable(&c->napi); err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); @@ -1488,21 +1495,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } - if (priv->xdp_prog) { - /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation); - if (err) - goto err_close_sqs; + err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + if (err) + goto err_close_sqs; - err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); - if (err) { - mlx5e_close_cq(&c->xdp_sq.cq); - goto err_close_sqs; - } - } - - c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; @@ -1512,10 +1508,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - if (priv->xdp_prog) { + if (c->xdp) mlx5e_close_sq(&c->xdp_sq); - mlx5e_close_cq(&c->xdp_sq.cq); - } err_close_sqs: mlx5e_close_sqs(c); @@ -1525,6 +1519,10 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); + +err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: From abd3277287c7743d3999b801c6769e8ad1b381dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:44 +0200 Subject: [PATCH 080/305] net/mlx5e: Disallow changing name-space for VF representors VF reps should be altogether on the same NS as they were created. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7fe6559e4ab3..bf1c09ca73c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -308,7 +308,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; #endif - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; eth_hw_addr_random(netdev); From 358d79a47e5a8db83925241629252cfe64f225f7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:45 +0200 Subject: [PATCH 081/305] net/mlx5e: Handle matching on vlan priority for offloaded TC rules We ignored the vlan priority in offloaded TC rules matching part, fix that. Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce8c54d18906..6bb21b31cfeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -237,12 +237,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_VLAN, f->mask); - if (mask->vlan_id) { + if (mask->vlan_id || mask->vlan_priority) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); } } From ee39fbc4447d5c42640963b559bf68490cb45308 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:46 +0200 Subject: [PATCH 082/305] net/mlx5: E-Switch, Set the actions for offloaded rules properly As for the current generation of the mlx5 HW (CX4/CX4-Lx) per flow vlan push/pop actions are emulated, we must not program them to the firmware. Fixes: f5f82476090f ('net/mlx5: E-Switch, Support VLAN actions in the offloads mode') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c55ad8d00c05..d239f5d0ea36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -57,7 +57,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - action = attr->action; + /* per flow vlan pop/push is emulated, don't set that into the firmware */ + action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; From 0e97a34083a03c931d4e9b34ba7899c29a09dce6 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 4 Nov 2016 01:48:47 +0200 Subject: [PATCH 083/305] net/mlx5: Fix invalid pointer reference when prof_sel parameter is invalid When prof_sel is invalid, mlx5_core_warn is called but the mlx5_core_dev is not initialized yet. Solution is moving the prof_sel code after dev->pdev assignment Fixes: 2974ab6e8bd8 ('net/mlx5: Improve driver log messages') Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d5433c49b2b0..3eb931585b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1226,6 +1226,9 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); + dev->pdev = pdev; + dev->event = mlx5_core_event; + if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { mlx5_core_warn(dev, "selected profile out of range, selecting default (%d)\n", @@ -1233,8 +1236,6 @@ static int init_one(struct pci_dev *pdev, prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; - dev->pdev = pdev; - dev->event = mlx5_core_event; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); From 3984903a2e3906d3def220e688040ce93368200a Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 27 Oct 2016 11:27:25 +0530 Subject: [PATCH 084/305] rtc: omap: Fix selecting external osc RTC can be clocked from an external 32KHz oscillator, or from the Peripheral PLL. The RTC has an internal oscillator buffer to support direct operation with a crystal. ---------------------------------------- | Device --------- | | | | | | | RTCSS | | | --------- | | | OSC |<------| RTC | | | | |------>| OSC |--- | | | | -------- | | | | | ----|clk | | | -------- | | | | | | PRCM |--- | | | | -------- -------- | ---------------------------------------- The RTC functional clock is sourced by default from the clock derived from the Peripheral PLL. In order to select source as external osc clk the following changes needs to be done: - Enable the RTC OSC (RTC_OSC_REG[4]OSC32K_GZ = 0) - Enable the clock mux(RTC_OSC_REG[6]K32CLK_EN = 1) - Select the external clock source (RTC_OSC_REG[3]32KCLK_SEL = 1) Fixes: 399cf0f63f6f2 ("rtc: omap: Add external clock enabling support") Signed-off-by: Keerthy Signed-off-by: Lokesh Vutla Signed-off-by: Dave Gerlach Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index b04ea9b5ae67..dddaa60871b9 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -113,6 +113,7 @@ /* OMAP_RTC_OSC_REG bit fields: */ #define OMAP_RTC_OSC_32KCLK_EN BIT(6) #define OMAP_RTC_OSC_SEL_32KCLK_SRC BIT(3) +#define OMAP_RTC_OSC_OSC32K_GZ_DISABLE BIT(4) /* OMAP_RTC_IRQWAKEEN bit fields: */ #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN BIT(1) @@ -786,8 +787,9 @@ static int omap_rtc_probe(struct platform_device *pdev) */ if (rtc->has_ext_clk) { reg = rtc_read(rtc, OMAP_RTC_OSC_REG); - rtc_write(rtc, OMAP_RTC_OSC_REG, - reg | OMAP_RTC_OSC_SEL_32KCLK_SRC); + reg &= ~OMAP_RTC_OSC_OSC32K_GZ_DISABLE; + reg |= OMAP_RTC_OSC_32KCLK_EN | OMAP_RTC_OSC_SEL_32KCLK_SRC; + rtc_writel(rtc, OMAP_RTC_OSC_REG, reg); } rtc->type->lock(rtc); From efce21fc43e00a76aee7b0a1eda73730ed2d5d3a Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 27 Oct 2016 11:27:26 +0530 Subject: [PATCH 085/305] rtc: omap: prevent disabling of clock/module during suspend If RTC is running from an internal clock source, the RTC module can't be disabled; otherwise it stops ticking completely. Current suspend handler implementation disables the clock/module unconditionally, instead fix this by disabling the clock only if we are running on external clock source, which is not affected by suspend. The prevention of disabling the clock must be done via implementing the runtime_pm handlers for the device, and returning an error code from the runtime suspend handler; otherwise OMAP core PM will disable the clocks for the driver. Signed-off-by: Tero Kristo Signed-off-by: Keerthy Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index dddaa60871b9..51e52446eacb 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -147,6 +147,7 @@ struct omap_rtc { u8 interrupts_reg; bool is_pmic_controller; bool has_ext_clk; + bool is_suspending; const struct omap_rtc_device_type *type; struct pinctrl_dev *pctldev; }; @@ -900,8 +901,7 @@ static int omap_rtc_suspend(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0); rtc->type->lock(rtc); - /* Disable the clock/module */ - pm_runtime_put_sync(dev); + rtc->is_suspending = true; return 0; } @@ -910,9 +910,6 @@ static int omap_rtc_resume(struct device *dev) { struct omap_rtc *rtc = dev_get_drvdata(dev); - /* Enable the clock/module so that we can access the registers */ - pm_runtime_get_sync(dev); - rtc->type->unlock(rtc); if (device_may_wakeup(dev)) disable_irq_wake(rtc->irq_alarm); @@ -920,11 +917,34 @@ static int omap_rtc_resume(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, rtc->interrupts_reg); rtc->type->lock(rtc); + rtc->is_suspending = false; + return 0; } #endif -static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume); +#ifdef CONFIG_PM +static int omap_rtc_runtime_suspend(struct device *dev) +{ + struct omap_rtc *rtc = dev_get_drvdata(dev); + + if (rtc->is_suspending && !rtc->has_ext_clk) + return -EBUSY; + + return 0; +} + +static int omap_rtc_runtime_resume(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops omap_rtc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume) + SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend, + omap_rtc_runtime_resume, NULL) +}; static void omap_rtc_shutdown(struct platform_device *pdev) { From e3c9d9d6ebfeeeee29c6240e1b5978d40d31d21f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 27 Oct 2016 13:06:44 -0200 Subject: [PATCH 086/305] ARM: dts: imx53-qsb: Fix regulator constraints Since commit fa93fd4ecc9c ("regulator: core: Ensure we are at least in bounds for our constraints") the imx53-qsb board populated with a Dialog DA9053 PMIC fails to boot: LDO3: Bringing 3300000uV into 1800000-1800000uV The LDO3 voltage constraints passed in the device tree do not match the valid range according to the datasheet, so fix this accordingly to allow the board booting again. While at it, fix the other voltage constraints as well. Cc: # 4.7.x Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-qsb.dts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts index dec4b073ceb1..379939699164 100644 --- a/arch/arm/boot/dts/imx53-qsb.dts +++ b/arch/arm/boot/dts/imx53-qsb.dts @@ -64,8 +64,8 @@ }; ldo3_reg: ldo3 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <1725000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; @@ -76,8 +76,8 @@ }; ldo5_reg: ldo5 { - regulator-min-microvolt = <1725000>; - regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; @@ -100,14 +100,14 @@ }; ldo9_reg: ldo9 { - regulator-min-microvolt = <1200000>; + regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3600000>; regulator-always-on; }; ldo10_reg: ldo10 { - regulator-min-microvolt = <1250000>; - regulator-max-microvolt = <3650000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; }; From d4eccafcaf339de77ec562e96e6b223d447f924a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 4 Nov 2016 14:45:08 -0700 Subject: [PATCH 087/305] xtensa: clean up printk usage for boot/crash logging Convert printk(KERN_* to pr_* and printk's without level to pr_cont. This fixes torn register dumps, stack dumps, stack traces and timestamps in the middle of 'Calibrating CPU frequency' message. Also drop unused show_code and drop false comment about show_stack. Signed-off-by: Max Filippov --- arch/xtensa/kernel/time.c | 14 ++++---- arch/xtensa/kernel/traps.c | 74 ++++++++++++-------------------------- 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 9a5bcd0381a7..be81e69b25bc 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -172,10 +172,11 @@ void __init time_init(void) { of_clk_init(NULL); #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT - printk("Calibrating CPU frequency "); + pr_info("Calibrating CPU frequency "); calibrate_ccount(); - printk("%d.%02d MHz\n", (int)ccount_freq/1000000, - (int)(ccount_freq/10000)%100); + pr_cont("%d.%02d MHz\n", + (int)ccount_freq / 1000000, + (int)(ccount_freq / 10000) % 100); #else ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL; #endif @@ -210,9 +211,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) void calibrate_delay(void) { loops_per_jiffy = ccount_freq / HZ; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(1000000/HZ), - (loops_per_jiffy/(10000/HZ)) % 100); + pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n", + loops_per_jiffy / (1000000 / HZ), + (loops_per_jiffy / (10000 / HZ)) % 100); } #endif diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index d02fc304b31c..ce37d5b899fe 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -465,26 +465,25 @@ void show_regs(struct pt_regs * regs) for (i = 0; i < 16; i++) { if ((i % 8) == 0) - printk(KERN_INFO "a%02d:", i); - printk(KERN_CONT " %08lx", regs->areg[i]); + pr_info("a%02d:", i); + pr_cont(" %08lx", regs->areg[i]); } - printk(KERN_CONT "\n"); - - printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", - regs->pc, regs->ps, regs->depc, regs->excvaddr); - printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", - regs->lbeg, regs->lend, regs->lcount, regs->sar); + pr_cont("\n"); + pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", + regs->pc, regs->ps, regs->depc, regs->excvaddr); + pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", + regs->lbeg, regs->lend, regs->lcount, regs->sar); if (user_mode(regs)) - printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", - regs->windowbase, regs->windowstart, regs->wmask, - regs->syscall); + pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", + regs->windowbase, regs->windowstart, regs->wmask, + regs->syscall); } static int show_trace_cb(struct stackframe *frame, void *data) { if (kernel_text_address(frame->pc)) { - printk(" [<%08lx>] ", frame->pc); - print_symbol("%s\n", frame->pc); + pr_cont(" [<%08lx>]", frame->pc); + print_symbol(" %s\n", frame->pc); } return 0; } @@ -494,19 +493,13 @@ void show_trace(struct task_struct *task, unsigned long *sp) if (!sp) sp = stack_pointer(task); - printk("Call Trace:"); -#ifdef CONFIG_KALLSYMS - printk("\n"); -#endif + pr_info("Call Trace:\n"); walk_stackframe(sp, show_trace_cb, NULL); - printk("\n"); +#ifndef CONFIG_KALLSYMS + pr_cont("\n"); +#endif } -/* - * This routine abuses get_user()/put_user() to reference pointers - * with at least a bit of error checking ... - */ - static int kstack_depth_to_print = 24; void show_stack(struct task_struct *task, unsigned long *sp) @@ -518,52 +511,29 @@ void show_stack(struct task_struct *task, unsigned long *sp) sp = stack_pointer(task); stack = sp; - printk("\nStack: "); + pr_info("Stack:\n"); for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(sp)) break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *sp++); + pr_cont(" %08lx", *sp++); + if (i % 8 == 7) + pr_cont("\n"); } - printk("\n"); show_trace(task, stack); } -void show_code(unsigned int *pc) -{ - long i; - - printk("\nCode:"); - - for(i = -3 ; i < 6 ; i++) { - unsigned long insn; - if (__get_user(insn, pc + i)) { - printk(" (Bad address in pc)\n"); - break; - } - printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>')); - } -} - DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; - int nl = 0; console_verbose(); spin_lock_irq(&die_lock); - printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif - if (nl) - printk("\n"); + pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, + IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : ""); show_regs(regs); if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); From e8a6123e9ead1b0d40349809e51de9341312fe08 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 23 Oct 2016 13:55:34 +0200 Subject: [PATCH 088/305] x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook Commit cc7cc02bada8 ("PCI: Query platform firmware for device power state") augmented struct pci_platform_pm_ops with a ->get_state hook and implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops existing till v4.7. However v4.8 introduced another pci_platform_pm_ops for Intel Mobile Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid: Add Power Management Unit driver"). It is missing the ->get_state hook, which is fatal since pci_set_platform_pm() enforces its presence. Andy Shevchenko reports that without the present commit, such a device "crashes without even a character printed out on serial console and reboots (since watchdog)". Retrofit mid_pci_platform_pm with the missing callback to fix the breakage. Acked-and-tested-by: Andy Shevchenko Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power state") Signed-off-by: Lukas Wunner Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: Andy Shevchenko Link: http://lkml.kernel.org/r/7c1567d4c49303a4aada94ba16275cbf56b8976b.1477221514.git.lukas@wunner.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel-mid.h | 1 + arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++ drivers/pci/pci-mid.c | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 5b6753d1f7f4..49da9f497b90 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -17,6 +17,7 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); extern void intel_mid_pwr_power_off(void); diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 5d3b45ad1c03..67375dda451c 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) } EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev) +{ + struct mid_pwr *pwr = midpwr; + int id, reg, bit; + u32 power; + + if (!pwr || !pwr->available) + return PCI_UNKNOWN; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return PCI_UNKNOWN; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + power = mid_pwr_get_state(pwr, reg); + return (__force pci_power_t)((power >> bit) & 3); +} + void intel_mid_pwr_power_off(void) { struct mid_pwr *pwr = midpwr; diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index 55f453de562e..c7f3408e3148 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) return intel_mid_pci_set_power_state(pdev, state); } +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) +{ + return intel_mid_pci_get_power_state(pdev); +} + static pci_power_t mid_pci_choose_state(struct pci_dev *pdev) { return PCI_D3hot; @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev) static struct pci_platform_pm_ops mid_pci_platform_pm = { .is_manageable = mid_pci_power_manageable, .set_state = mid_pci_set_power_state, + .get_state = mid_pci_get_power_state, .choose_state = mid_pci_choose_state, .sleep_wake = mid_pci_sleep_wake, .run_wake = mid_pci_run_wake, From 4db069a2bf990e278ea57ff615dcaa89b85376bd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 4 Nov 2016 07:13:52 +0100 Subject: [PATCH 089/305] drm/sun4i: Propagate error to the caller If 'sun4i_layers_init()' returns an error, propagate it instead of returning -EINVAL unconditionally. Signed-off-by: Christophe JAILLET Reviewed-by: Gustavo Padovan Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 077f3785439e..70e9fd59c5a2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -144,7 +144,7 @@ static int sun4i_drv_bind(struct device *dev) drv->layers = sun4i_layers_init(drm); if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); - ret = -EINVAL; + ret = PTR_ERR(drv->layers); goto free_drm; } From 17ae1c650c1ecf8dc8e16d54b0f68a345965f43f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:19 +0100 Subject: [PATCH 090/305] phy: fix device reference leaks Make sure to drop the reference taken by bus_find_device_by_name() before returning from phy_connect() and phy_attach(). Note that both function still take a reference to the phy device through phy_attach_direct(). Fixes: e13934563db0 ("[PATCH] PHY Layer fixup") Cc: Florian Fainelli Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e977ba931878..1a4bf8acad78 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -723,6 +723,7 @@ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_connect_direct(dev, phydev, handler, interface); + put_device(d); if (rc) return ERR_PTR(rc); @@ -953,6 +954,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); + put_device(d); if (rc) return ERR_PTR(rc); From c7262aaace1b17a650598063e3b9ee1785fde377 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:20 +0100 Subject: [PATCH 091/305] net: ethernet: ti: cpsw: fix device and of_node leaks Make sure to drop the references taken by of_get_child_by_name() and bus_find_device() before returning from cpsw_phy_sel(). Note that holding a reference to the cpsw-phy-sel device does not prevent the devres-managed private data from going away. Fixes: 5892cd135e16 ("drivers: net: cpsw-phy-sel: Add new driver...") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 054a8dd23dae..ba1e45ff6aae 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -176,9 +176,12 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave) } dev = bus_find_device(&platform_bus_type, NULL, node, match); + of_node_put(node); priv = dev_get_drvdata(dev); priv->cpsw_phy_sel(priv, phy_mode, slave); + + put_device(dev); } EXPORT_SYMBOL_GPL(cpsw_phy_sel); From 6bed0118012ea350acbe606ab3ae0ed3d60ed5f3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:21 +0100 Subject: [PATCH 092/305] net: ethernet: ti: davinci_emac: fix device reference leak Make sure to drop the references taken by bus_find_device() before returning from emac_dev_open(). Note that phy_connect still takes a reference to the phy device. Fixes: 5d69e0076a72 ("net: davinci_emac: switch to new mdio") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2fd94a5bc1f3..84fbe5714f8b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1410,6 +1410,7 @@ static int emac_dev_open(struct net_device *ndev) int i = 0; struct emac_priv *priv = netdev_priv(ndev); struct phy_device *phydev = NULL; + struct device *phy = NULL; ret = pm_runtime_get_sync(&priv->pdev->dev); if (ret < 0) { @@ -1488,19 +1489,20 @@ static int emac_dev_open(struct net_device *ndev) /* use the first phy on the bus if pdata did not give us a phy id */ if (!phydev && !priv->phy_id) { - struct device *phy; - phy = bus_find_device(&mdio_bus_type, NULL, NULL, match_first_device); - if (phy) + if (phy) { priv->phy_id = dev_name(phy); + if (!priv->phy_id || !*priv->phy_id) + put_device(phy); + } } if (!phydev && priv->phy_id && *priv->phy_id) { phydev = phy_connect(ndev, priv->phy_id, &emac_adjust_link, PHY_INTERFACE_MODE_MII); - + put_device(phy); /* reference taken by bus_find_device */ if (IS_ERR(phydev)) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_id); From 2271150bfb814b72ec57ae2fdf66e39da2eafafd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:22 +0100 Subject: [PATCH 093/305] net: hns: fix device reference leaks Make sure to drop the reference taken by class_find_device() in hnae_get_handle() on errors and when later releasing the handle. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem...") Cc: Yisen Zhuang Cc: Salil Mehta Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index c54c6fac0d1d..b6ed818f78ff 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -332,8 +332,10 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev, return ERR_PTR(-ENODEV); handle = dev->ops->get_handle(dev, port_id); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + put_device(&dev->cls_dev); return handle; + } handle->dev = dev; handle->owner_dev = owner_dev; @@ -356,6 +358,8 @@ out_when_init_queue: for (j = i - 1; j >= 0; j--) hnae_fini_queue(handle->qs[j]); + put_device(&dev->cls_dev); + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(hnae_get_handle); @@ -377,6 +381,8 @@ void hnae_put_handle(struct hnae_handle *h) dev->ops->put_handle(h); module_put(dev->owner); + + put_device(&dev->cls_dev); } EXPORT_SYMBOL(hnae_put_handle); From 7233bc84a3aeda835d334499dc00448373caf5c0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 3 Nov 2016 17:03:41 -0200 Subject: [PATCH 094/305] sctp: assign assoc_id earlier in __sctp_connect sctp_wait_for_connect() currently already holds the asoc to keep it alive during the sleep, in case another thread release it. But Andrey Konovalov and Dmitry Vyukov reported an use-after-free in such situation. Problem is that __sctp_connect() doesn't get a ref on the asoc and will do a read on the asoc after calling sctp_wait_for_connect(), but by then another thread may have closed it and the _put on sctp_wait_for_connect will actually release it, causing the use-after-free. Fix is, instead of doing the read after waiting for the connect, do it before so, and avoid this issue as the socket is still locked by then. There should be no issue on returning the asoc id in case of failure as the application shouldn't trust on that number in such situations anyway. This issue doesn't exist in sctp_sendmsg() path. Reported-by: Dmitry Vyukov Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 71b75f9d9c1b..faa48ff5cf4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; From 483bed2b0ddd12ec33fc9407e0c6e1088e77a97c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:01:19 +0100 Subject: [PATCH 095/305] bpf: fix htab map destruction when extra reserve is in use Commit a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") added an extra per-cpu reserve to the hash table map to restore old behaviour from pre prealloc times. When non-prealloc is in use for a map, then problem is that once a hash table extra element has been linked into the hash-table, and the hash table is destroyed due to refcount dropping to zero, then htab_map_free() -> delete_all_elements() will walk the whole hash table and drop all elements via htab_elem_free(). The problem is that the element from the extra reserve is first fed to the wrong backend allocator and eventually freed twice. Fixes: a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 570eeca7bdfa..ad1bc67aff1b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_del_rcu(&l->hash_node); - htab_elem_free(htab, l); + if (l->state != HTAB_EXTRA_ELEM_USED) + htab_elem_free(htab, l); } } } From 20b2b24f91f70e7d3f0918c077546cb21bd73a87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:56:31 +0100 Subject: [PATCH 096/305] bpf: fix map not being uncharged during map creation failure In map_create(), we first find and create the map, then once that suceeded, we charge it to the user's RLIMIT_MEMLOCK, and then fetch a new anon fd through anon_inode_getfd(). The problem is, once the latter fails f.e. due to RLIMIT_NOFILE limit, then we only destruct the map via map->ops->map_free(), but without uncharging the previously locked memory first. That means that the user_struct allocation is leaked as well as the accounted RLIMIT_MEMLOCK memory not released. Make the label names in the fix consistent with bpf_prog_load(). Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 228f962447a5..237f3d6a7ddc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -194,7 +194,7 @@ static int map_create(union bpf_attr *attr) err = bpf_map_charge_memlock(map); if (err) - goto free_map; + goto free_map_nouncharge; err = bpf_map_new_fd(map); if (err < 0) @@ -204,6 +204,8 @@ static int map_create(union bpf_attr *attr) return err; free_map: + bpf_map_uncharge_memlock(map); +free_map_nouncharge: map->ops->map_free(map); return err; } From 85566ca6b6d7e131837cd197a441d98e83146fae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 14:52:39 -0700 Subject: [PATCH 097/305] ARM: OMAP3: Fix formatting of features printed With the printk cleanups merged into v4.9-rc1, we now get the omap revision printed on multiple lines. Let's fix that and also remove the extra empty space at the end of the features. And let's update things to use scnprintf as suggested by Ivaylo Dimitrov . Reported-by: Adam Ford Cc: Ivaylo Dimitrov Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/id.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2abd53ae3e7a..cc6d9fa60924 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -205,11 +205,15 @@ void __init omap2xxx_check_revision(void) #define OMAP3_SHOW_FEATURE(feat) \ if (omap3_has_ ##feat()) \ - printk(#feat" "); + n += scnprintf(buf + n, sizeof(buf) - n, #feat " "); static void __init omap3_cpuinfo(void) { const char *cpu_name; + char buf[64]; + int n = 0; + + memset(buf, 0, sizeof(buf)); /* * OMAP3430 and OMAP3530 are assumed to be same. @@ -241,10 +245,10 @@ static void __init omap3_cpuinfo(void) cpu_name = "OMAP3503"; } - sprintf(soc_name, "%s", cpu_name); + scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name); /* Print verbose information */ - pr_info("%s %s (", soc_name, soc_rev); + n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev); OMAP3_SHOW_FEATURE(l2cache); OMAP3_SHOW_FEATURE(iva); @@ -252,8 +256,10 @@ static void __init omap3_cpuinfo(void) OMAP3_SHOW_FEATURE(neon); OMAP3_SHOW_FEATURE(isp); OMAP3_SHOW_FEATURE(192mhz_clk); - - printk(")\n"); + if (*(buf + n - 1) == ' ') + n--; + n += scnprintf(buf + n, sizeof(buf) - n, ")\n"); + pr_info("%s", buf); } #define OMAP3_CHECK_FEATURE(status,feat) \ From 72bb40b8b7620f1390c84c10309a40e886bf449e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 19 Oct 2016 15:44:12 -0500 Subject: [PATCH 098/305] ARM: AM43XX: Select OMAP_INTERCONNECT in Kconfig AM437x makes use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a9afeebd59f2..0465338183c7 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -71,6 +71,7 @@ config SOC_AM43XX select HAVE_ARM_TWD select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_DRA7XX bool "TI DRA7XX" From 271a3024db1f32ca34f504178fade6ef95cd6c9b Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 09:12:31 -0500 Subject: [PATCH 099/305] ARM: dts: omap3: Fix memory node in Torpedo board Commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") added the memory node, but the patch didn't have the correct starting address. This patch fixes the correct starting address. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 731ec37aed5b..8f9a69ca818c 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -13,9 +13,9 @@ }; }; - memory@0 { + memory@80000000 { device_type = "memory"; - reg = <0 0>; + reg = <0x80000000 0>; }; leds { From 4ae46efcff19445afbf49fe7038de6020f37fefe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2016 12:00:21 +0100 Subject: [PATCH 100/305] ARM: OMAP2+: PRM: initialize en_uart4_mask and grpsel_uart4_mask In the case where has_uart4 is false, en_uart4_mask and grpsel_uart4_mask are not initialized and so any garbage value is being logically or'd into the write of PM_WKEN and OMAP3430_PM_MPUGRPSEL. Fix this by initializing these masks to zero. Signed-off-by: Colin Ian King Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm3xxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 62680aad2126..718981bb80cd 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -319,6 +319,9 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva) if (has_uart4) { en_uart4_mask = OMAP3630_EN_UART4_MASK; grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK; + } else { + en_uart4_mask = 0; + grpsel_uart4_mask = 0; } /* Enable wakeups in PER */ From 0ab11d8ea46fd6faf67df4461c795091429a1496 Mon Sep 17 00:00:00 2001 From: Nicolae Rosia Date: Tue, 1 Nov 2016 11:49:25 +0200 Subject: [PATCH 101/305] ARM: OMAP2+: avoid NULL pointer dereference For OMAP4, volt_data is set in omap44xx_voltagedomains_init. If the SoC is neither OMAP443X or OMAP446X, we end up with a NULL in volt_data which causes a kernel oops. This is the case when booting OMAP4470. Signed-off-by: Nicolae Rosia Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/voltage.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c index cba8cada8c81..cd15dbd62671 100644 --- a/arch/arm/mach-omap2/voltage.c +++ b/arch/arm/mach-omap2/voltage.c @@ -87,6 +87,12 @@ int voltdm_scale(struct voltagedomain *voltdm, return -ENODATA; } + if (!voltdm->volt_data) { + pr_err("%s: No voltage data defined for vdd_%s\n", + __func__, voltdm->name); + return -ENODATA; + } + /* Adjust voltage to the exact voltage from the OPP table */ for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) { if (voltdm->volt_data[i].volt_nominal >= target_volt) { From 725ed2238cdb3807c19e7edcb20fde8d0f91597f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:09 +0200 Subject: [PATCH 102/305] dts: omap5: board-common: add phandle to reference Palmas gpadc Will be needed for iio based drivers. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 6365635fea5c..7cd1c674ceec 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -415,7 +415,7 @@ ti,backup-battery-charge-high-current; }; - gpadc { + gpadc: gpadc { compatible = "ti,palmas-gpadc"; interrupts = <18 0 16 0 From 0b68f1beea9ed2b31ff7873d5ed0cfbd087da0eb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:10 +0200 Subject: [PATCH 103/305] dts: omap5: board-common: enable twl6040 headset jack detection Signed-off-by: H. Nikolaus Schaller Reviewed-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7cd1c674ceec..60a33c4b7b82 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -124,6 +124,7 @@ compatible = "ti,abe-twl6040"; ti,model = "omap5-uevm"; + ti,jack-detection; ti,mclk-freq = <19200000>; ti,mcpdm = <&mcpdm>; From 1219e3db7ecb59ab269e9c8b1a199d82b8d088bb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:11 +0200 Subject: [PATCH 104/305] ASoC: omap-abe-twl6040: fix typo in bindings documentation Signed-off-by: H. Nikolaus Schaller Acked-by: Peter Ujfalusi Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt index fd40c852d7c7..462b04e8209f 100644 --- a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt +++ b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt @@ -12,7 +12,7 @@ Required properties: Optional properties: - ti,dmic: phandle for the OMAP dmic node if the machine have it connected -- ti,jack_detection: Need to be present if the board capable to detect jack +- ti,jack-detection: Need to be present if the board capable to detect jack insertion, removal. Available audio endpoints for the audio-routing table: From 5d41ce29e3b91ef305f88d23f72b3359de329cec Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 3 Nov 2016 16:17:26 -0700 Subject: [PATCH 105/305] net: icmp6_send should use dst dev to determine L3 domain icmp6_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have a dst set. Update icmp6_send to use the dst on the skb to determine L3 domain. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..7370ad2e693a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; else - iif = l3mdev_master_ifindex(skb->dev); + iif = l3mdev_master_ifindex(skb_dst(skb)->dev); /* * Must not send error if the source does not uniquely From f3358507c11999c91abf54744658bccd49b5879c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 4 Nov 2016 12:55:36 +0200 Subject: [PATCH 106/305] virtio-net: drop legacy features in virtio 1 mode Virtio 1.0 spec says VIRTIO_F_ANY_LAYOUT and VIRTIO_NET_F_GSO are legacy-only feature bits. Do not negotiate them in virtio 1 mode. Note this is a spec violation so we need to backport it to stable/downstream kernels. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fad84f3f4109..fd8b1e62301f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2038,23 +2038,33 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +#define VIRTNET_FEATURES \ + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ + VIRTIO_NET_F_MAC, \ + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ + VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ + VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ + VIRTIO_NET_F_CTRL_MAC_ADDR, \ + VIRTIO_NET_F_MTU + static unsigned int features[] = { - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, - VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, - VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTNET_FEATURES, +}; + +static unsigned int features_legacy[] = { + VIRTNET_FEATURES, + VIRTIO_NET_F_GSO, VIRTIO_F_ANY_LAYOUT, - VIRTIO_NET_F_MTU, }; static struct virtio_driver virtio_net_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), + .feature_table_legacy = features_legacy, + .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From 8e0140a2d7c9f55b794a5fce22e05350a435b965 Mon Sep 17 00:00:00 2001 From: Fabian Mewes Date: Fri, 4 Nov 2016 13:16:14 +0100 Subject: [PATCH 107/305] Documentation: networking: dsa: Update tagging protocols Add Qualcomm QCA tagging introduced in cafdc45c9 to the list of supported protocols. Signed-off-by: Fabian Mewes Reviewed-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 6d6c07cf1a9a..63912ef34606 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -67,13 +67,14 @@ Note that DSA does not currently create network interfaces for the "cpu" and Switch tagging protocols ------------------------ -DSA currently supports 4 different tagging protocols, and a tag-less mode as +DSA currently supports 5 different tagging protocols, and a tag-less mode as well. The different protocols are implemented in: net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy) net/dsa/tag_dsa.c: Marvell's original DSA tag net/dsa/tag_edsa.c: Marvell's enhanced DSA tag net/dsa/tag_brcm.c: Broadcom's 4 bytes tag +net/dsa/tag_qca.c: Qualcomm's 2 bytes tag The exact format of the tag protocol is vendor specific, but in general, they all contain something which: From fd0285a39b1cb496f60210a9a00ad33a815603e7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 4 Nov 2016 15:11:57 -0400 Subject: [PATCH 108/305] fib_trie: Correct /proc/net/route off by one error The display of /proc/net/route has had a couple issues due to the fact that when I originally rewrote most of fib_trie I made it so that the iterator was tracking the next value to use instead of the current. In addition it had an off by 1 error where I was tracking the first piece of data as position 0, even though in reality that belonged to the SEQ_START_TOKEN. This patch updates the code so the iterator tracks the last reported position and key instead of the next expected position and key. In addition it shifts things so that all of the leaves start at 1 instead of trying to report leaves starting with offset 0 as being valid. With these two issues addressed this should resolve any off by one errors that were present in the display of /proc/net/route. Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route") Cc: Andy Whitcroft Reported-by: Jason Baron Tested-by: Jason Baron Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31cef3602585..4cff74d4133f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2413,22 +2413,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, struct key_vector *l, **tp = &iter->tnode; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2437,7 +2434,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2465,7 +2462,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return fib_route_get_idx(iter, *pos); iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2474,7 +2471,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2483,7 +2480,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; From 53f8d322234649b4d6f1515b20c127a577efd164 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 14:00:45 +0800 Subject: [PATCH 109/305] gpio: pca953x: Fix corruption of other gpios in set_multiple. gpiod_set_array_value_complex does not clear the bits field. Therefore when the drivers set_multiple funciton is called bits outside the mask are undefined and can be either set or not. So bank_val needs to be masked with bank_mask before or with the reg_val cache. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index e422568e14ad..4a8d0fe60e0c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -380,6 +380,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, if (bank_mask) { bank_val = bits[bank / sizeof(*bits)] >> ((bank % sizeof(*bits)) * 8); + bank_val &= bank_mask; reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val; } } From 386377b5473043c09b2de40bfe5abfb0fc87e1b4 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 13:18:11 +0800 Subject: [PATCH 110/305] gpio: pca953x: Move memcpy into mutex lock for set multiple Need to ensure that reg_output is not updated while setting multiple bits. This makes the mutex locking behaviour for the set_multiple call consistent with that of the set_value call. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 4a8d0fe60e0c..fe731f094257 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -372,8 +372,8 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, bank_shift = fls((chip->gpio_chip.ngpio - 1) / BANK_SZ); - memcpy(reg_val, chip->reg_output, NBANK(chip)); mutex_lock(&chip->i2c_lock); + memcpy(reg_val, chip->reg_output, NBANK(chip)); for (bank = 0; bank < NBANK(chip); bank++) { bank_mask = mask[bank / sizeof(*mask)] >> ((bank % sizeof(*mask)) * 8); @@ -608,7 +608,6 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, if (client->irq && irq_base != -1 && (chip->driver_data & PCA_INT)) { - ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat); if (ret) From 7ee7e87dfb158e79019ea1d5ea1b0e6f2bc93ee4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Nov 2016 19:57:00 +0100 Subject: [PATCH 111/305] genirq: Use irq type from irqdata instead of irqdesc The type flags in the irq descriptor are there for historical reasons and only updated via irq_modify_status() or irq_set_type(). Both functions also update the type flags in irqdata. __setup_irq() is the only left over user of the type flags in the irq descriptor. If __setup_irq() is called with empty irq type flags, then the type flags are retrieved from irqdata. If an interrupt is shared, then the type flags are compared with the type flags stored in the irq descriptor. On x86 the ioapic does not have a irq_set_type() callback because the type is defined in the BIOS tables and cannot be changed. The type is stored in irqdata at setup time without updating the type data in the irq descriptor. As a result the comparison described above fails. There is no point in updating the irq descriptor flags because the only relevant storage is irqdata. Use the type flags from irqdata for both retrieval and comparison in __setup_irq() instead. Aside of that the print out in case of non matching type flags has the old and new type flags arguments flipped. Fix that as well. For correctness sake the flags stored in the irq descriptor should be removed, but this is beyond the scope of this bugfix and will be done in a later patch. Fixes: 4b357daed698 ("genirq: Look-up trigger type if not specified by caller") Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jon Hunter Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611072020360.3501@nanos Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c4d30483264..6b669593e7eb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1341,12 +1341,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; - unsigned int omsk = irq_settings_get_trigger_mask(desc); + unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); if (nmsk != omsk) /* hope the handler works with current trigger mode */ pr_warn("irq %d uses trigger mode %u; requested %u\n", - irq, nmsk, omsk); + irq, omsk, nmsk); } *old_ptr = new; From 8ae94224c9d72fc4d9aaac93b2d7833cf46d7141 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:38 +0100 Subject: [PATCH 112/305] kbuild: add -fno-PIE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debian started to build the gcc with -fPIE by default so the kernel build ends before it starts properly with: |kernel/bounds.c:1:0: error: code model kernel does not support PIC mode Also add to KBUILD_AFLAGS due to: |gcc -Wp,-MD,arch/x86/entry/vdso/vdso32/.note.o.d … -mfentry -DCC_USING_FENTRY … vdso/vdso32/note.S |arch/x86/entry/vdso/vdso32/note.S:1:0: sorry, unimplemented: -mfentry isn’t supported for 32-bit in combination with -fpic Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 512e47a53e9a..58fc5d935ce6 100644 --- a/Makefile +++ b/Makefile @@ -622,6 +622,8 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) From 8fbfef7f505bba60fb57078b7621270ee57cd1c4 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 17:14:03 -0700 Subject: [PATCH 113/305] ipvs: use IPVS_CMD_ATTR_MAX for family.maxattr family.maxattr is the max index for policy[], the size of ops[] is determined with ARRAY_SIZE(). Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..a6e44ef2ec9a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = { .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, + .maxattr = IPVS_CMD_ATTR_MAX, .netnsok = true, /* Make ipvsadm to work on netns */ }; From fb9c9649a1d0a65a8f94f784aa18252a0dd584c1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Oct 2016 03:01:50 +0200 Subject: [PATCH 114/305] netfilter: connmark: ignore skbs with magic untracked conntrack objects The (percpu) untracked conntrack entries can end up with nonzero connmarks. The 'untracked' conntrack objects are merely a way to distinguish INVALID (i.e. protocol connection tracker says payload doesn't meet some requirements or packet was never seen by the connection tracking code) from packets that are intentionally not tracked (some icmpv6 types such as neigh solicitation, or by using 'iptables -j CT --notrack' option). Untracked conntrack objects are implementation detail, we might as well use invalid magic address instead to tell INVALID and UNTRACKED apart. Check skb->nfct for untracked dummy and behave as if skb->nfct is NULL. Reported-by: XU Tianwen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connmark.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 69f78e96fdb4..b83e158e116a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; From 6114cc516dcc0d311badb83ad7db5aa4b611bea6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Nov 2016 14:44:42 +0100 Subject: [PATCH 115/305] netfilter: conntrack: fix CT target for UNSPEC helpers Thomas reports its not possible to attach the H.245 helper: iptables -t raw -A PREROUTING -p udp -j CT --helper H.245 iptables: No chain/target/match by that name. xt_CT: No such helper "H.245" This is because H.245 registers as NFPROTO_UNSPEC, but the CT target passes NFPROTO_IPV4/IPV6 to nf_conntrack_helper_try_module_get. We should treat UNSPEC as wildcard and ignore the l3num instead. Reported-by: Thomas Woerner Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 336e21559e01..7341adf7059d 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name) && - h->tuple.src.l3num == l3num && - h->tuple.dst.protonum == protonum) + if (strcmp(h->name, name)) + continue; + + if (h->tuple.src.l3num != NFPROTO_UNSPEC && + h->tuple.src.l3num != l3num) + continue; + + if (h->tuple.dst.protonum == protonum) return h; } } From e0df8cae6c16b9ba66a005079aa754b9eedc6efa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Nov 2016 16:54:58 +0100 Subject: [PATCH 116/305] netfilter: conntrack: refine gc worker heuristics Nicolas Dichtel says: After commit b87a2f9199ea ("netfilter: conntrack: add gc worker to remove timed-out entries"), netlink conntrack deletion events may be sent with a huge delay. Nicolas further points at this line: goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); and indeed, this isn't optimal at all. Rationale here was to ensure that we don't block other work items for too long, even if nf_conntrack_htable_size is huge. But in order to have some guarantee about maximum time period where a scan of the full conntrack table completes we should always use a fixed slice size, so that once every N scans the full table has been examined at least once. We also need to balance this vs. the case where the system is either idle (i.e., conntrack table (almost) empty) or very busy (i.e. eviction happens from packet path). So, after some discussion with Nicolas: 1. want hard guarantee that we scan entire table at least once every X s -> need to scan fraction of table (get rid of upper bound) 2. don't want to eat cycles on idle or very busy system -> increase interval if we did not evict any entries 3. don't want to block other worker items for too long -> make fraction really small, and prefer small scan interval instead 4. Want reasonable short time where we detect timed-out entry when system went idle after a burst of traffic, while not doing scans all the time. -> Store next gc scan in worker, increasing delays when no eviction happened and shrinking delay when we see timed out entries. The old gc interval is turned into a max number, scans can now happen every jiffy if stale entries are present. Longest possible time period until an entry is evicted is now 2 minutes in worst case (entry expires right after it was deemed 'not expired'). Reported-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 49 ++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index df2f5a3901df..0f87e5d21be7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; @@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ #define GC_MAX_BUCKETS_DIV 64u -#define GC_MAX_BUCKETS 8192u -#define GC_INTERVAL (5 * HZ) +/* upper bound of scan intervals */ +#define GC_INTERVAL_MAX (2 * HZ) +/* maximum conntracks to evict per gc run */ #define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned long next_run = GC_INTERVAL; - unsigned int ratio, scanned = 0; struct conntrack_gc_work *gc_work; + unsigned int ratio, scanned = 0; + unsigned long next_run; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; do { @@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work) if (gc_work->exiting) return; + /* + * Eviction will normally happen from the packet path, and not + * from this gc worker. + * + * This worker is only here to reap expired entries when system went + * idle after a busy period. + * + * The heuristics below are supposed to balance conflicting goals: + * + * 1. Minimize time until we notice a stale entry + * 2. Maximize scan intervals to not waste cycles + * + * Normally, expired_count will be 0, this increases the next_run time + * to priorize 2) above. + * + * As soon as a timed-out entry is found, move towards 1) and increase + * the scan frequency. + * In case we have lots of evictions next scan is done immediately. + */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + gc_work->next_gc_run = 0; next_run = 0; + } else if (expired_count) { + gc_work->next_gc_run /= 2U; + next_run = msecs_to_jiffies(1); + } else { + if (gc_work->next_gc_run < GC_INTERVAL_MAX) + gc_work->next_gc_run += msecs_to_jiffies(1); + + next_run = gc_work->next_gc_run; + } gc_work->last_bucket = i; - schedule_delayed_work(&gc_work->dwork, next_run); + queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->next_gc_run = GC_INTERVAL_MAX; gc_work->exiting = false; } @@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); return 0; From 58c78e104d937c1f560fb10ed9bb2dcde0db4fcf Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 6 Nov 2016 14:40:01 +0800 Subject: [PATCH 117/305] netfilter: nf_tables: fix oops when inserting an element into a verdict map Dalegaard says: The following ruleset, when loaded with 'nft -f bad.txt' ----snip---- flush ruleset table ip inlinenat { map sourcemap { type ipv4_addr : verdict; } chain postrouting { ip saddr vmap @sourcemap accept } } add chain inlinenat test add element inlinenat sourcemap { 100.123.10.2 : jump test } ----snip---- results in a kernel oops: BUG: unable to handle kernel paging request at 0000000000001344 IP: [] nf_tables_check_loops+0x114/0x1f0 [nf_tables] [...] Call Trace: [] ? nft_data_init+0x13e/0x1a0 [nf_tables] [] nft_validate_register_store+0x60/0xb0 [nf_tables] [] nft_add_set_elem+0x545/0x5e0 [nf_tables] [] ? nft_table_lookup+0x30/0x60 [nf_tables] [] ? nla_strcmp+0x40/0x50 [] nf_tables_newsetelem+0x11e/0x210 [nf_tables] [] ? nla_validate+0x60/0x80 [] nfnetlink_rcv+0x354/0x5a7 [nfnetlink] Because we forget to fill the net pointer in bind_ctx, so dereferencing it may cause kernel crash. Reported-by: Dalegaard Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d6a626b08f1..026581b04ea8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3568,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { + .net = ctx->net, .afi = ctx->afi, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, From 34a515d27c6573b6f550877b30dd5e0f440c3d8f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 4 Jan 2016 16:34:22 -0800 Subject: [PATCH 118/305] drm/fsl-dcu: do not update when modifying irq registers The IRQ status and mask registers are not "double buffered" according to the reference manual. Hence, there is no extra transfer/update write needed when modifying these registers. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index e04efbed1a54..cc2fde2ae5ef 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -59,8 +59,6 @@ static int fsl_dcu_drm_irq_init(struct drm_device *dev) regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0); regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return ret; } @@ -139,8 +137,6 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg) drm_handle_vblank(dev, 0); regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return IRQ_HANDLED; } From 93daeeca2c9472a47d419884a64f6ca2b7f006e4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 5 Sep 2016 19:05:12 -0700 Subject: [PATCH 119/305] drm/fsl-dcu: update all registers on flush Use the UPDATE_MODE READREG bit to initiate a register transfer on flush. This makes sure that we flush all registers only once for all planes. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 5 +++++ drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index b2d5e188b1b8..2ea9dbd9be30 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -25,8 +25,13 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + struct drm_device *dev = crtc->dev; + struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; struct drm_pending_vblank_event *event = crtc->state->event; + regmap_write(fsl_dev->regmap, + DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); + if (event) { crtc->state->event = NULL; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index 9e6f7d8112b3..a99f48847420 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -160,11 +160,6 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane, DCU_LAYER_POST_SKIP(0) | DCU_LAYER_PRE_SKIP(0)); } - regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, - DCU_MODE_DCU_MODE_MASK, - DCU_MODE_DCU_MODE(DCU_MODE_NORMAL)); - regmap_write(fsl_dev->regmap, - DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); return; } From 3d6f37102bd6e4b55a7f336d44974c0bd1c22a15 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 31 Oct 2016 09:51:19 -0700 Subject: [PATCH 120/305] drm/fsl-dcu: disable planes before disabling CRTC After disabling and reenabling the CRTC the DCU sometimes got stuck displaying the whole screen with a solid color. Disabling and reenabling the CRTC did not recover from the situation. This was often reproducable by just restarting the X-Server. The disabling sequence is not explicitly documented. But it turns out that disabling the planes before disabling the CRTC seems to prevent the above situation from happening. Use the callback ->atomic_disable instead of ->disable which allows to use the drm_atomic_helper_disable_planes_on_crtc() helper to disable planes before disabling the controller. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index 2ea9dbd9be30..deb57435cc89 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -44,11 +44,15 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, } } -static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc) +static void fsl_dcu_drm_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) { struct drm_device *dev = crtc->dev; struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + /* always disable planes on the CRTC */ + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true); + drm_crtc_vblank_off(crtc); regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, @@ -127,8 +131,8 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) } static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = { + .atomic_disable = fsl_dcu_drm_crtc_atomic_disable, .atomic_flush = fsl_dcu_drm_crtc_atomic_flush, - .disable = fsl_dcu_drm_disable_crtc, .enable = fsl_dcu_drm_crtc_enable, .mode_set_nofb = fsl_dcu_drm_crtc_mode_set_nofb, }; From 29f0c9edbdd98a977a4c629f411260f6e0356c67 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 8 Nov 2016 17:28:02 +0100 Subject: [PATCH 121/305] arm64: dts: marvell: Fix typo in label name on Armada 37xx The label names of the peripheral clocks have a typo. Fix it before it is more widely used. Reported-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index c4762538ec01..e9bd58793464 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -105,7 +105,7 @@ status = "disabled"; }; - nb_perih_clk: nb-periph-clk@13000{ + nb_periph_clk: nb-periph-clk@13000 { compatible = "marvell,armada-3700-periph-clock-nb"; reg = <0x13000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, @@ -113,7 +113,7 @@ #clock-cells = <1>; }; - sb_perih_clk: sb-periph-clk@18000{ + sb_periph_clk: sb-periph-clk@18000 { compatible = "marvell,armada-3700-periph-clock-sb"; reg = <0x18000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, From 2ec27be33898effa47fcb2cd45abb552a97fac89 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 6 Sep 2016 19:41:12 +0200 Subject: [PATCH 122/305] arm64: dts: marvell: fix clocksource for CP110 slave SPI0 I2C and SPI interfaces share common clock trees within the CP110 HW block. It occurred that SPI0 interface has wrong clock assignment in the device tree, which is fixed in this commit to a proper value. Fixes: c749b8d9de32 ("arm64: dts: marvell: add description for the ...") Signed-off-by: Marcin Wojtas Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 842fb333285c..565b3cb3d7ff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cps_syscon0 0 3>; + clocks = <&cps_syscon0 1 21>; status = "disabled"; }; From 8d897006fe9206d64cbe353310be26d7c911e69d Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 8 Nov 2016 17:31:32 +0100 Subject: [PATCH 123/305] arm64: dts: marvell: add unique identifiers for Armada A8k SPI controllers Enabling SPI controllers, which are attached to different busses inside an SoC, may result in overlapping enumeration and cause sysfs registration failure. Example log after enabling two controllers on Armada 8040 SoC with same identifiers: [ 3.740415] sysfs: cannot create duplicate filename '/class/spi_master/spi0' [ 3.747510] ------------[ cut here ]------------ [ 3.752145] WARNING: at fs/sysfs/dir.c:31 [...] [ 4.002299] orion_spi: probe of f4700600.spi failed with error -17 spi-orion driver offers dedicated DT property ('cell-index'), that allow setting unique identifiers. Recently added support for CP110-slave HW block introduced two new SPI controllers' nodes with same ID as ones from CP110-master. This commit fixes the issue by assigning different 'cell-index' values for CP110-slave SPI controllers. Fixes: 4eef78a0091b ("arm64: dts: marvell: add description for the slave CP110 in Armada 8K") Signed-off-by: Marcin Wojtas Acked-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 565b3cb3d7ff..6bf9e241179b 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -130,7 +130,7 @@ reg = <0x700600 0x50>; #address-cells = <0x1>; #size-cells = <0x0>; - cell-index = <1>; + cell-index = <3>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; @@ -140,7 +140,7 @@ reg = <0x700680 0x50>; #address-cells = <1>; #size-cells = <0>; - cell-index = <2>; + cell-index = <4>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; From 9cba9844547731d2f14d79485c43192ffaa37b76 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Oct 2016 01:21:10 +0900 Subject: [PATCH 124/305] perf hist browser: Fix hierarchy column counts The perf report/top on TUI supports horizontal scrolling using LEFT and RIGHT keys. But it calculate the number of columns incorrectly when hierarchy mode is enabled so that keep pressing RIGHT key can make the output disappeared. In the hierarchy mode, all sort keys are collapsed into a single column, so it needs to be applied when calculating column numbers. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024162110.17918-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4ffff7be9299..5adedc1a09d3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser, browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; - hists__for_each_format(hists, fmt) + if (symbol_conf.report_hierarchy) { + struct perf_hpp_list_node *fmt_node; + + /* count overhead columns (in the first node) */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + ++browser->b.columns; + + /* add a single column for whole hierarchy sort keys*/ ++browser->b.columns; + } else { + hists__for_each_format(hists, fmt) + ++browser->b.columns; + } hists__reset_column_width(hists); } From 3d9f4683929a968dc9b9493f4e608b109ad292a2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:30 +0900 Subject: [PATCH 125/305] perf hists browser: Fix indentation of folded sign on --hierarchy It should indent 2 spaces for folded sign and a whitespace. Signed-off-by: Namhyung Kim Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5adedc1a09d3..225ef2a15a13 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1337,8 +1337,8 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } if (first) { - ui_browser__printf(&browser->b, "%c", folded_sign); - width--; + ui_browser__printf(&browser->b, "%c ", folded_sign); + width -= 2; first = false; } else { ui_browser__printf(&browser->b, " "); @@ -1555,7 +1555,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows int indent = hists->nr_hpp_node - 2; bool first_node, first_col; - ret = scnprintf(buf, size, " "); + ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; From 131d51eb1d17aac3a604cf929fd99ff4dd34f495 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:31 +0900 Subject: [PATCH 126/305] perf hists browser: Show folded sign properly on --hierarchy When horizontal scrolling is used in hierarchy mode, the folded signed disappears at the right most column. Committer note: To test it, run 'perf top --hierarchy, see the '+' symbol at the first column, then press the right arrow key, the '+' symbol will disappear, this patch fixes that. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-3-namhyung@kernel.org [ Move 'width -= 2' invariant to right after the if/else ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 225ef2a15a13..e767fbd17ad2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1381,7 +1381,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } perf_hpp_list__for_each_format(entry->hpp_list, fmt) { - ui_browser__write_nstring(&browser->b, "", 2); + if (first) { + ui_browser__printf(&browser->b, "%c ", folded_sign); + first = false; + } else { + ui_browser__write_nstring(&browser->b, "", 2); + } + width -= 2; /* From b9bf911e990a189f89147ee6b66660a153ed0125 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:32 +0900 Subject: [PATCH 127/305] perf hists browser: Fix column indentation on --hierarchy When horizontall scrolling is used in hierarchy mode, the the right most column has unnecessary indentation. Actually it's needed only if some of left (overhead) columns were shown. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e767fbd17ad2..a53fef0c673b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1361,8 +1361,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, width -= hpp.buf - s; } - ui_browser__write_nstring(&browser->b, "", hierarchy_indent); - width -= hierarchy_indent; + if (!first) { + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + } if (column >= browser->b.horiz_scroll) { char s[2048]; @@ -1565,6 +1567,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows if (advance_hpp_check(&dummy_hpp, ret)) return ret; + first_node = true; /* the first hpp_list_node is for overhead columns */ fmt_node = list_first_entry(&hists->hpp_formats, struct perf_hpp_list_node, list); @@ -1579,12 +1582,16 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); if (advance_hpp_check(&dummy_hpp, ret)) break; + + first_node = false; } - ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", - indent * HIERARCHY_INDENT, ""); - if (advance_hpp_check(&dummy_hpp, ret)) - return ret; + if (!first_node) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + indent * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + } first_node = true; list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { From c72ab446cac1d6c9551fd26c4cfef1b2fc5041fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:33 +0900 Subject: [PATCH 128/305] perf hists: Fix column length on --hierarchy Markus reported that there's a weird behavior on perf top --hierarchy regarding the column length. Looking at the code, I found a dubious code which affects the symptoms. When --hierarchy option is used, the last column length might be inaccurate since it skips to update the length on leaf entries. I cannot remember why it did and looks like a leftover from previous version during the development. Anyway, updating the column length often is not harmful. So let's move the code out. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 1a3906a7e6b9 ("perf hists: Resort hist entries with hierarchy") Link: http://lkml.kernel.org/r/20161108130833.9263-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b02992efb513..a69f027368ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1600,18 +1600,18 @@ static void hists__hierarchy_output_resort(struct hists *hists, if (prog) ui_progress__update(prog, 1); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + if (!he->leaf) { hists__hierarchy_output_resort(hists, prog, &he->hroot_in, &he->hroot_out, min_callchain_hits, use_callchain); - hists->nr_entries++; - if (!he->filtered) { - hists->nr_non_filtered_entries++; - hists__calc_col_len(hists, he); - } - continue; } From b0b6e86846093c5f8820386bc01515f857dd8faa Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 8 Nov 2016 09:35:06 +0100 Subject: [PATCH 129/305] x86/cpu/AMD: Fix cpu_llc_id for AMD Fam17h systems cpu_llc_id (Last Level Cache ID) derivation on AMD Fam17h has an underflow bug when extracting the socket_id value. It starts from 0 so subtracting 1 from it will result in an invalid value. This breaks scheduling topology later on since the cpu_llc_id will be incorrect. For example, the the cpu_llc_id of the *other* CPU in the loops in set_cpu_sibling_map() underflows and we're generating the funniest thread_siblings masks and then when I run 8 threads of nbench, they get spread around the LLC domains in a very strange pattern which doesn't give you the normal scheduling spread one would expect for performance. Other things like EDAC use cpu_llc_id so they will be b0rked too. So, the APIC ID is preset in APICx020 for bits 3 and above: they contain the core complex, node and socket IDs. The LLC is at the core complex level so we can find a unique cpu_llc_id by right shifting the APICID by 3 because then the least significant bit will be the Core Complex ID. Tested-by: Borislav Petkov Signed-off-by: Yazen Ghannam [ Cleaned up and extended the commit message. ] Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: # v4.4.. Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: 3849e91f571d ("x86/AMD: Fix last level cache topology for AMD Fam17h systems") Link: http://lkml.kernel.org/r/20161108083506.rvqb5h4chrcptj7d@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..1e81a37c034e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) return; - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } From aa5fd0fb77486b8a6764ead8627baa14790e4280 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 4 Nov 2016 10:28:49 +0800 Subject: [PATCH 130/305] driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed. When there is no existing macvlan port in lowdev, one new macvlan port would be created. But it doesn't be destoried when something failed later. It casues some memleak. Now add one flag to indicate if new macvlan port is created. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 3234fcdea317..d2d6f12a112f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1278,6 +1278,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct net_device *lowerdev; int err; int macmode; + bool create = false; if (!tb[IFLA_LINK]) return -EINVAL; @@ -1304,12 +1305,18 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, err = macvlan_port_create(lowerdev); if (err < 0) return err; + create = true; } port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ - if (port->passthru) - return -EINVAL; + if (port->passthru) { + /* The macvlan port must be not created this time, + * still goto destroy_macvlan_port for readability. + */ + err = -EINVAL; + goto destroy_macvlan_port; + } vlan->lowerdev = lowerdev; vlan->dev = dev; @@ -1325,24 +1332,28 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); if (vlan->mode == MACVLAN_MODE_PASSTHRU) { - if (port->count) - return -EINVAL; + if (port->count) { + err = -EINVAL; + goto destroy_macvlan_port; + } port->passthru = true; eth_hw_addr_inherit(dev, lowerdev); } if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { - if (vlan->mode != MACVLAN_MODE_SOURCE) - return -EINVAL; + if (vlan->mode != MACVLAN_MODE_SOURCE) { + err = -EINVAL; + goto destroy_macvlan_port; + } macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]); err = macvlan_changelink_sources(vlan, macmode, data); if (err) - return err; + goto destroy_macvlan_port; } err = register_netdevice(dev); if (err < 0) - return err; + goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); @@ -1357,7 +1368,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, unregister_netdev: unregister_netdevice(dev); - +destroy_macvlan_port: + if (create) + macvlan_port_destroy(port->dev); return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From 3023898b7d4aac65987bd2f485cc22390aae6f78 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 4 Nov 2016 15:36:49 -0400 Subject: [PATCH 131/305] sock: fix sendmmsg for partial sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not send the next message in sendmmsg for partial sendmsg invocations. sendmmsg assumes that it can continue sending the next message when the return value of the individual sendmsg invocations is positive. It results in corrupting the data for TCP, SCTP, and UNIX streams. For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream of "aefgh" if the first sendmsg invocation sends only the first byte while the second sendmsg goes through. Datagram sockets either send the entire datagram or fail, so this patch affects only sockets of type SOCK_STREAM and SOCK_SEQPACKET. Fixes: 228e548e6020 ("net: Add sendmmsg socket system call") Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: Neal Cardwell Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..272518b087c8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2038,6 +2038,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; cond_resched(); } From fb56be83e43d0bb0cc9e8c35a6a9cac853231ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Fri, 4 Nov 2016 14:51:54 -0700 Subject: [PATCH 132/305] net-ipv6: on device mtu change do not add mtu to mtu-less routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routes can specify an mtu explicitly or inherit the mtu from the underlying device - this inheritance is implemented in dst->ops->mtu handlers ip6_mtu() and ip6_blackhole_mtu(). Currently changing the mtu of a device adds mtu explicitly to routes using that device. ie. # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65535 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 After this patch the route entry no longer changes unless it already has an mtu. There is no need: this inheritance is already done in ip6_mtu() # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route add local 2000::2 dev lo mtu 2000 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 1501 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 1501 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 # ip -6 route del local 2000::2 This is desirable because changing device mtu and then resetting it to the previous value shouldn't change the user visible routing table. Signed-off-by: Maciej Żenczykowski CC: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7403d90dcb38..1b57e11e6e0d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2761,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && + dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { if (rt->rt6i_flags & RTF_CACHE) { /* For RTF_CACHE with rt6i_pmtu == 0 From f91d718156fe93d0cf684cacf5f247c35a825d79 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Nov 2016 18:05:06 +0200 Subject: [PATCH 133/305] Revert "net/mlx4_en: Fix panic during reboot" This reverts commit 9d2afba058722d40cc02f430229c91611c0e8d16. The original issue would possibly exist if an external module tried calling our "ethtool_ops" without checking if it still exists. The right way of solving it is by simply doing the check in the caller side. Currently, no action is required as there's no such use case. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 12c99a2655f2..3a47e83d3e07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2202,7 +2202,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (!shutdown) free_netdev(dev); - dev->ethtool_ops = NULL; } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) From d667f78514c656a6a8bf0b3d6134a7fe5cd4d317 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 7 Nov 2016 17:57:56 +0800 Subject: [PATCH 134/305] bna: Add synchronization for tx ring. We received two reports of BUG_ON in bnad_txcmpl_process() where hw_consumer_index appeared to be ahead of producer_index. Out of order write/read of these variables could explain these reports. bnad_start_xmit(), as a producer of tx descriptors, has a few memory barriers sprinkled around writes to producer_index and the device's doorbell but they're not paired with anything in bnad_txcmpl_process(), a consumer. Since we are synchronizing with a device, we must use mandatory barriers, not smp_*. Also, I didn't see the purpose of the last smp_mb() in bnad_start_xmit(). Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index f9df4b5ae90e..f42f672b0e7e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -177,6 +177,7 @@ bnad_txcmpl_process(struct bnad *bnad, struct bna_tcb *tcb) return 0; hw_cons = *(tcb->hw_consumer_index); + rmb(); cons = tcb->consumer_index; q_depth = tcb->q_depth; @@ -3094,7 +3095,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) BNA_QE_INDX_INC(prod, q_depth); tcb->producer_index = prod; - smp_mb(); + wmb(); if (unlikely(!test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags))) return NETDEV_TX_OK; @@ -3102,7 +3103,6 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) skb_tx_timestamp(skb); bna_txq_prod_indx_doorbell(tcb); - smp_mb(); return NETDEV_TX_OK; } From cdb26d3387f0cdf7b2a2eea581385173547ef21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 7 Nov 2016 13:53:27 +0100 Subject: [PATCH 135/305] net: bgmac: fix reversed checks for clock control flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes regression introduced by patch adding feature flags. It was already reported and patch followed (it got accepted) but it appears it was incorrect. Instead of fixing reversed condition it broke a good one. This patch was verified to actually fix SoC hanges caused by bgmac on BCM47186B0. Fixes: db791eb2970b ("net: ethernet: bgmac: convert to feature flags") Fixes: 4af1474e6198 ("net: bgmac: Fix errant feature flag check") Cc: Jon Mason Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 91cbf92de971..49f4cafe5438 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1049,9 +1049,9 @@ static void bgmac_enable(struct bgmac *bgmac) mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT; - if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0) + if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0) bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT); - if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2) + if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) && mode == 2) bgmac_cco_ctl_maskset(bgmac, 1, ~0, BGMAC_CHIPCTL_1_RXC_DLL_BYPASS); From d49597fd3bc7d9534de55e9256767f073be1b33a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Nov 2016 16:35:51 +0100 Subject: [PATCH 136/305] x86/cpu: Deal with broken firmware (VMWare/XEN) Both ACPI and MP specifications require that the APIC id in the respective tables must be the same as the APIC id in CPUID. The kernel retrieves the physical package id from the APIC id during the ACPI/MP table scan and builds the physical to logical package map. The physical package id which is used after a CPU comes up is retrieved from CPUID. So we rely on ACPI/MP tables and CPUID agreeing in that respect. There exist VMware and XEN implementations which violate the spec. As a result the physical to logical package map, which relies on the ACPI/MP tables does not work on those systems, because the CPUID initialized physical package id does not match the firmware id. This causes system crashes and malfunction due to invalid package mappings. The only way to cure this is to sanitize the physical package id after the CPUID enumeration and yell when the APIC ids are different. Fix up the initial APIC id, which is fine as it is only used printout purposes. If the physical package IDs differ yell and use the package information from the ACPI/MP tables so the existing logical package map just works. Chas provided the resulting dmesg output for his affected 4 virtual sockets, 1 core per socket VM: [Firmware Bug]: CPU1: APIC id mismatch. Firmware: 1 CPUID: 2 [Firmware Bug]: CPU1: Using firmware package id 1 instead of 2 .... Reported-and-tested-by: "Charles (Chas) Williams" , Reported-by: M. Vefa Bicakci Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Borislav Petkov Cc: Alok Kataria Cc: Boris Ostrovsky Cc: #4.6+ Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611091613540.3501@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..cc9e980c68ec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -978,6 +978,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } } +/* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* From 82031ea29e454b574bc6f49a33683a693ca5d907 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:39 +0100 Subject: [PATCH 137/305] scripts/has-stack-protector: add -fno-PIE Adding -no-PIE to the fstack protector check. -no-PIE was introduced before -fstack-protector so there is no need for a runtime check. Without it the build stops: |Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: -fstack-protector-strong available but compiler is broken due to -mcmodel=kernel + -fPIE if -fPIE is enabled by default. Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 973e8c141567..17867e723a51 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else From 90944e40ba1838de4b2a9290cf273f9d76bd3bdd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:40 +0100 Subject: [PATCH 138/305] x86/kexec: add -fno-PIE If the gcc is configured to do -fPIE by default then the build aborts later with: | Unsupported relocation type: unknown type rel type name (29) Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index ac58c1616408..555b9fa0ad43 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large KBUILD_CFLAGS += -m$(BITS) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From cc6acc11cad1eb1ae39707a3a6e4a97fafbeeabd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Nov 2016 15:34:05 +1100 Subject: [PATCH 139/305] kbuild: be more careful about matching preprocessed asm ___EXPORT_SYMBOL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CRC code for asm exports grabs the preprocessed asm, finds the ___EXPORT_SYMBOL and turns those into EXPORT_SYMBOL in a C program that can be preprocessed and parsed to create the CRC signatures from the type. The existing regex matching and replacement is too strict, and doesn't deal well with whitespace among other things. The line " EXPORT_SYMBOL(sym)" in a .S file would not match due to initial whitespace, for example, which resulted in x86's ___preempt_schedule failing to get CRCs. Reported-by: Philip Müller Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3e223c264469..7675d11ee65e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -332,8 +332,8 @@ cmd_gensymtypes_S = \ (echo "\#include " ; \ echo "\#include " ; \ $(CPP) $(a_flags) $< | \ - grep ^___EXPORT_SYMBOL | \ - sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + grep "\<___EXPORT_SYMBOL\>" | \ + sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \ $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ From 80513a2b9f0448eadd10ae81a42229b33ef451fb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 08:34:59 -0500 Subject: [PATCH 140/305] ARM: omap3: Add missing memory node in SOM-LV The skeleton.dtsi file was removed in ARM64 for different reasons as explained in commit ("3ebee5a2e141 arm64: dts: kill skeleton.dtsi"). commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") had fixes for Torpedo and Overo boards, but this SOM-LV was missed. This should help prevent the DTC warning: "Node /memory has a reg or ranges property, but no unit name" Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 0ff1c2de95bf..26cce4d18405 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -13,6 +13,11 @@ }; }; + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0>; + }; + wl12xx_vmmc: wl12xx_vmmc { compatible = "regulator-fixed"; regulator-name = "vwl1271"; From 3e884493448131179a5b7cae1ddca1028ffaecc8 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:40 -0600 Subject: [PATCH 141/305] net: qcom/emac: configure the external phy to allow pause frames Pause frames are used to enable flow control. A MAC can send and receive pause frames in order to throttle traffic. However, the PHY must be configured to allow those frames to pass through. Reviewed-by: Florian Fainelli Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 6fb3bee904d3..70a55dcc431d 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1003,6 +1003,12 @@ int emac_mac_up(struct emac_adapter *adpt) writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + /* Enable pause frames. Without this feature, the EMAC has been shown + * to receive (and drop) frames with FCS errors at gigabit connections. + */ + adpt->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->advertising |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; phy_start(adpt->phydev); From df63022e182de4041b65ae22df1950d3416b577e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:41 -0600 Subject: [PATCH 142/305] net: qcom/emac: enable flow control if requested If the PHY has been configured to allow pause frames, then the MAC should be configured to generate and/or accept those frames. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 70a55dcc431d..0b4deb31e742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -575,10 +575,11 @@ void emac_mac_start(struct emac_adapter *adpt) mac |= TXEN | RXEN; /* enable RX/TX */ - /* We don't have ethtool support yet, so force flow-control mode - * to 'full' always. - */ - mac |= TXFC | RXFC; + /* Configure MAC flow control to match the PHY's settings. */ + if (phydev->pause) + mac |= RXFC; + if (phydev->pause != phydev->asym_pause) + mac |= TXFC; /* setup link speed */ mac &= ~SPEED_MASK; From 9d1a6c4ea43e48c7880c85971c17939b56832d8a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Nov 2016 12:03:09 -0800 Subject: [PATCH 143/305] net: icmp_route_lookup should use rt dev to determine L3 domain icmp_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have an rt. Update icmp_route_lookup to use the rt on the skb to determine L3 domain. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..48734ee6293f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key_hash(net, fl4, @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_in->dev, + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) From 6dbcd8fb5968fda3a5fba019dfb0c80c3139627b Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 7 Nov 2016 14:27:28 -0600 Subject: [PATCH 144/305] ibmvnic: Start completion queue negotiation at server-provided optimum values Use the opt_* fields to determine the starting point for negotiating the number of tx/rx completion queues with the vnic server. These contain the number of queues that the vnic server estimates that it will be able to allocate. While renegotiation may still occur, using the opt_* fields will reduce the number of times this needs to happen and will prevent driver probe timeout on systems using large numbers of ibmvnic client devices per vnic port. Signed-off-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5f44c5520fbc..f6c9b6d38ac7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1505,9 +1505,8 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) adapter->max_rx_add_entries_per_subcrq > entries_page ? entries_page : adapter->max_rx_add_entries_per_subcrq; - /* Choosing the maximum number of queues supported by firmware*/ - adapter->req_tx_queues = adapter->max_tx_queues; - adapter->req_rx_queues = adapter->max_rx_queues; + adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues; + adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; adapter->req_mtu = adapter->max_mtu; From 4053ab1bf98dd128344b9e67ef139f931a967ae1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Nov 2016 22:09:07 +0100 Subject: [PATCH 145/305] vxlan: hide unused local variable A bugfix introduced a harmless warning in v4.9-rc4: drivers/net/vxlan.c: In function 'vxlan_group_used': drivers/net/vxlan.c:947:21: error: unused variable 'sock6' [-Werror=unused-variable] This hides the variable inside of the same #ifdef that is around its user. The extraneous initialization is removed at the same time, it was accidentally introduced in the same commit. Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.") Signed-off-by: Arnd Bergmann Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f3c2fa3ab0d5..24532cdebb00 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -944,7 +944,9 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; struct vxlan_sock *sock4; - struct vxlan_sock *sock6 = NULL; +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6; +#endif unsigned short family = dev->default_dst.remote_ip.sa.sa_family; sock4 = rtnl_dereference(dev->vn4_sock); From f567e950bf51290755a2539ff2aaef4c26f735d3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 7 Nov 2016 23:22:19 +0100 Subject: [PATCH 146/305] rtnl: reset calcit fptr in rtnl_unregister() To avoid having dangling function pointers left behind, reset calcit in rtnl_unregister(), too. This is no issue so far, as only the rtnl core registers a netlink handler with a calcit hook which won't be unregistered, but may become one if new code makes use of the calcit hook. Fixes: c7ac8679bec9 ("rtnetlink: Compute and store minimum ifinfo...") Cc: Jeff Kirsher Cc: Greg Rose Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fb7348f13501..db313ec7af32 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype) rtnl_msg_handlers[protocol][msgindex].doit = NULL; rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; + rtnl_msg_handlers[protocol][msgindex].calcit = NULL; return 0; } From 8da3cf2a49a6d0ca5e620c6a5eee49b99a3f0880 Mon Sep 17 00:00:00 2001 From: Allan Chou Date: Tue, 8 Nov 2016 16:08:01 -0600 Subject: [PATCH 147/305] Net Driver: Add Cypress GX3 VID=04b4 PID=3610. Add support for Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller (Vendor=04b4 ProdID=3610). Patch verified on x64 linux kernel 4.7.4, 4.8.6, 4.9-rc4 systems with the Kensington SD4600P USB-C Universal Dock with Power, which uses the Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. A similar patch was signed-off and tested-by Allan Chou on 2015-12-01. Allan verified his similar patch on x86 Linux kernel 4.1.6 system with Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. Tested-by: Allan Chou Tested-by: Chris Roth Tested-by: Artjom Simon Signed-off-by: Allan Chou Signed-off-by: Chris Roth Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e6338c16081a..8a6675d92b98 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1656,6 +1656,19 @@ static const struct driver_info ax88178a_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info cypress_GX3_info = { + .description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct driver_info dlink_dub1312_info = { .description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter", .bind = ax88179_bind, @@ -1717,6 +1730,10 @@ static const struct usb_device_id products[] = { /* ASIX AX88178A 10/100/1000 */ USB_DEVICE(0x0b95, 0x178a), .driver_info = (unsigned long)&ax88178a_info, +}, { + /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */ + USB_DEVICE(0x04b4, 0x3610), + .driver_info = (unsigned long)&cypress_GX3_info, }, { /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */ USB_DEVICE(0x2001, 0x4a00), From 9b6c14d51bd2304b92f842e96172a9cc822fc77c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Nov 2016 09:07:26 -0800 Subject: [PATCH 148/305] net: tcp response should set oif only if it is L3 master Lorenzo noted an Android unit test failed due to e0d56fdd7342: "The expectation in the test was that the RST replying to a SYN sent to a closed port should be generated with oif=0. In other words it should not prefer the interface where the SYN came in on, but instead should follow whatever the routing table says it should do." Revert the change to ip_send_unicast_reply and tcp_v6_send_response such that the oif in the flow is set to the skb_iif only if skb_iif is an L3 master. Fixes: e0d56fdd7342 ("net: l3mdev: remove redundant calls") Reported-by: Lorenzo Colitti Signed-off-by: David Ahern Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49714010ac2e..9403fa3850be 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1577,7 +1577,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - oif = oif ? : skb->skb_iif; + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..6ca23c2e76f7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else - fl6.flowi6_oif = oif ? : skb->skb_iif; + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + + fl6.flowi6_oif = oif; + } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; From 2ecb704a1290edb5e3d53a75529192e7ed2a1a28 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 10 Nov 2016 13:20:05 +0800 Subject: [PATCH 149/305] ALSA: hda - add a new condition to check if it is thinkpad Latest Thinkpad laptops use the HKEY_HID LEN0268 instead of the LEN0068, as a result neither audio mute led nor mic mute led can work any more. After adding the new HKEY_HID into the is_thinkpad(), both of them works well as before. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/thinkpad_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 6a23302297c9..4d9d320a7971 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -13,7 +13,8 @@ static void (*old_vmaster_hook)(void *, int); static bool is_thinkpad(struct hda_codec *codec) { return (codec->core.subsystem_id >> 16 == 0x17aa) && - (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068")); + (acpi_dev_found("LEN0068") || acpi_dev_found("LEN0268") || + acpi_dev_found("IBM0068")); } static void update_tpacpi_mute_led(void *private_data, int enabled) From 0ace81ec7192201af48528c309ee0b4103021f55 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 9 Nov 2016 15:04:39 -0500 Subject: [PATCH 150/305] ipv4: update comment to document GSO fragmentation cases. This is a follow-up to commit 9ee6c5dc816a ("ipv4: allow local fragmentation in ip_finish_output_gso()"), updating the comment documenting cases in which fragmentation is needed for egress GSO packets. Suggested-by: Shmulik Ladkani Reviewed-by: Shmulik Ladkani Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9403fa3850be..105908d841a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -244,12 +244,18 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); - /* Slowpath - GSO segment length is exceeding the dst MTU. + /* Slowpath - GSO segment length exceeds the egress MTU. * - * This can happen in two cases: - * 1) TCP GRO packet, DF bit not set - * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly - * from host network stack. + * This can happen in several cases: + * - Forwarding of a TCP GRO skb, when DF flag is not set. + * - Forwarding of an skb that arrived on a virtualization interface + * (virtio-net/vhost/tap) with TSO/GSO size set by other network + * stack. + * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an + * interface with a smaller MTU. + * - Arriving GRO skb (or GSO skb in a virtualized environment) that is + * bridged to a NETIF_F_TSO tunnel stacked over an interface with an + * insufficent MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); From 8d1d8fcb21cfc4a65731760c3100920f929e8f3d Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:43 +0200 Subject: [PATCH 151/305] qed: configure ll2 RoCE v1/v2 flavor correctly Currently RoCE v2 won't operate with RDMA CM due to missing setting of the roce-flavour in the ll2 configuration. This patch properly sets the flavour, and deletes incorrect HSI that doesn't [yet] exist. Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 72eee29c677f..2777d5bb4380 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -727,9 +727,6 @@ struct core_tx_bd_flags { #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 - }; struct core_tx_bd { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 63e1a1b0ef8e..f95385cbbd40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1119,6 +1119,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << CORE_TX_BD_FLAGS_START_BD_SHIFT; SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type); DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); From 5c5f26090840951b4102d9a1e6db9aac41101e5a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:44 +0200 Subject: [PATCH 152/305] qed: Correct rdma params configuration Previous fix has broken RoCE support as the rdma_pf_params are now being set into the parameters only after the params are alrady assigned into the hw-function. Fixes: 0189efb8f4f8 ("qed*: Fix Kconfig dependencies with INFINIBAND_QEDR") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c418360ba02a..333c7442e48a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -839,20 +839,19 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; + if (IS_ENABLED(CONFIG_QED_RDMA)) { + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; + } + for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *params; } - - if (!IS_ENABLED(CONFIG_QED_RDMA)) - return; - - params->rdma_pf_params.num_qps = QED_ROCE_QPS; - params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; - /* divide by 3 the MRs to avoid MF ILT overflow */ - params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; - params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; } static int qed_slowpath_start(struct qed_dev *cdev, From 33b1341cd1bf5c89e7ef332aa8ac3ed614a3d942 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:04 +0100 Subject: [PATCH 153/305] mlxsw: spectrum_router: Fix handling of neighbour structure __neigh_create function works in a different way than assumed. It passes "n" as a parameter to ndo_neigh_construct. But this "n" might be destroyed right away before __neigh_create() returns in case there is already another neighbour struct in the hashtable with the same dev and primary key. That is not expected by mlxsw_sp_router_neigh_construct() and the stored "n" points to freed memory, eventually leading to crash. Fix this by doing tight 1:1 coupling between neighbour struct and internal driver neigh_entry. That allows to narrow down the key in internal driver hashtable to do lookups by "n" only. Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 95 +++++++------------ 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4573da2c5560..28630129065d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -600,15 +600,13 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) } struct mlxsw_sp_neigh_key { - unsigned char addr[sizeof(struct in6_addr)]; - struct net_device *dev; + struct neighbour *n; }; struct mlxsw_sp_neigh_entry { struct rhash_head ht_node; struct mlxsw_sp_neigh_key key; u16 rif; - struct neighbour *n; bool offloaded; struct delayed_work dw; struct mlxsw_sp_port *mlxsw_sp_port; @@ -646,19 +644,15 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, - struct net_device *dev, u16 rif, - struct neighbour *n) +mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) { struct mlxsw_sp_neigh_entry *neigh_entry; neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); if (!neigh_entry) return NULL; - memcpy(neigh_entry->key.addr, addr, addr_len); - neigh_entry->key.dev = dev; + neigh_entry->key.n = n; neigh_entry->rif = rif; - neigh_entry->n = n; INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); INIT_LIST_HEAD(&neigh_entry->nexthop_list); return neigh_entry; @@ -671,13 +665,11 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) } static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, - size_t addr_len, struct net_device *dev) +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { - struct mlxsw_sp_neigh_key key = {{ 0 } }; + struct mlxsw_sp_neigh_key key; - memcpy(key.addr, addr, addr_len); - key.dev = dev; + key.n = n; return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, &key, mlxsw_sp_neigh_ht_params); } @@ -689,26 +681,20 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_rif *r; - u32 dip; int err; if (n->tbl != &arp_tbl) return 0; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); - if (neigh_entry) { - WARN_ON(neigh_entry->n != n); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (neigh_entry) return 0; - } r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); if (WARN_ON(!r)) return -EINVAL; - neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, - r->rif, n); + neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); if (!neigh_entry) return -ENOMEM; err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); @@ -727,14 +713,11 @@ void mlxsw_sp_router_neigh_destroy(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - u32 dip; if (n->tbl != &arp_tbl) return; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!neigh_entry) return; mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); @@ -862,7 +845,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) * is active regardless of the traffic. */ if (!list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); } @@ -908,9 +891,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, nexthop_neighs_list_node) { - if (!(neigh_entry->n->nud_state & NUD_VALID) && + if (!(neigh_entry->key.n->nud_state & NUD_VALID) && !list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); @@ -927,7 +910,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) { struct mlxsw_sp_neigh_entry *neigh_entry = container_of(work, struct mlxsw_sp_neigh_entry, dw.work); - struct neighbour *n = neigh_entry->n; + struct neighbour *n = neigh_entry->key.n; struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char rauht_pl[MLXSW_REG_RAUHT_LEN]; @@ -1030,11 +1013,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_port->mlxsw_sp; dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, - &dip, - sizeof(__be32), - dev); - if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (WARN_ON(!neigh_entry)) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_DONE; } @@ -1343,33 +1323,26 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, struct fib_nh *fib_nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - u32 gwip = ntohl(fib_nh->nh_gw); struct net_device *dev = fib_nh->nh_dev; struct neighbour *n; u8 nud_state; - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - __be32 gwipn = htonl(gwip); - - n = neigh_create(&arp_tbl, &gwipn, dev); + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The reference is taken either in neigh_lookup() or + * in neith_create() in case n is not found. + */ + n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); + if (!n) { + n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - neigh_release(n); - return -EINVAL; - } - } else { - /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. - * The second branch takes the reference in neith_create() - */ - n = neigh_entry->n; - neigh_clone(n); + } + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; } /* If that is the first nexthop connected to that neigh, add to @@ -1403,7 +1376,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, if (list_empty(&nh->neigh_entry->nexthop_list)) list_del(&nh->neigh_entry->nexthop_neighs_list_node); - neigh_release(neigh_entry->n); + neigh_release(neigh_entry->key.n); } static struct mlxsw_sp_nexthop_group * @@ -1463,11 +1436,11 @@ static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, for (i = 0; i < fi->fib_nhs; i++) { struct fib_nh *fib_nh = &fi->fib_nh[i]; - u32 gwip = ntohl(fib_nh->nh_gw); + struct neighbour *n = nh->neigh_entry->key.n; - if (memcmp(nh->neigh_entry->key.addr, - &gwip, sizeof(u32)) == 0 && - nh->neigh_entry->key.dev == fib_nh->nh_dev) + if (memcmp(n->primary_key, &fib_nh->nh_gw, + sizeof(fib_nh->nh_gw)) == 0 && + n->dev == fib_nh->nh_dev) return true; } return false; From 0e3715c9c250747280b1757ea267c577e7591e31 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:05 +0100 Subject: [PATCH 154/305] mlxsw: spectrum_router: Ignore FIB notification events for non-init namespaces Since now, the table with same id in multiple netnamespaces were squashed to a single virtual router. That is not only incorrect, it also causes error messages when trying to use RALUE register to do double remove of FIB entries, like this one: mlxsw_spectrum 0000:03:00.0: EMAD reg access failed (tid=facb831c00007b20,reg_id=8013(ralue),type=write,status=7(bad parameter)) Since we don't allow ports to change namespaces (NETIF_F_NETNS_LOCAL), and the infrastructure is not yet prepared to handle netnamespaces, just ignore FIB notification events for non-init namespaces. That is clear to do since we don't need to offload them. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 28630129065d..040737e14a3f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1931,6 +1931,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_entry_notifier_info *fen_info = ptr; int err; + if (!net_eq(fen_info->info.net, &init_net)) + return NOTIFY_DONE; + switch (event) { case FIB_EVENT_ENTRY_ADD: err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); From d052db11c153cfb469f13a4121966f30ecb57c66 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 6 Nov 2016 21:20:32 +0100 Subject: [PATCH 155/305] i2c: mux: demux-pinctrl: make drivers with no pinctrl work again Some drivers like i2c-gpio do not have dedicated pinctrl states. They broke when error checking for pinctrl was added. Detect them now, and in their case, simply skip over pinctrl configuration. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index b3893f6282ba..3e6fe1760d82 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -69,10 +69,28 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne goto err_with_revert; } - p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name); + /* + * Check if there are pinctrl states at all. Note: we cant' use + * devm_pinctrl_get_select() because we need to distinguish between + * the -ENODEV from devm_pinctrl_get() and pinctrl_lookup_state(). + */ + p = devm_pinctrl_get(adap->dev.parent); if (IS_ERR(p)) { ret = PTR_ERR(p); - goto err_with_put; + /* continue if just no pinctrl states (e.g. i2c-gpio), otherwise exit */ + if (ret != -ENODEV) + goto err_with_put; + } else { + /* there are states. check and use them */ + struct pinctrl_state *s = pinctrl_lookup_state(p, priv->bus_name); + + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto err_with_put; + } + ret = pinctrl_select_state(p, s); + if (ret < 0) + goto err_with_put; } priv->chan[new_chan].parent_adap = adap; From f10a59eb8c1087f0ce03cf0392cd483922187066 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 10 Nov 2016 15:03:21 +0100 Subject: [PATCH 156/305] i2c: Documentation: i2c-topology: fix minor whitespace nit Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-topology | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology index e0aefeece551..1a014fede0b7 100644 --- a/Documentation/i2c/i2c-topology +++ b/Documentation/i2c/i2c-topology @@ -326,7 +326,7 @@ Two parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | parent- |--' '--------' .--| locked | .--------. @@ -350,7 +350,7 @@ Mux-locked and parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | mux- |--' '--------' .--| locked | .--------. From 7bc61cc5df808008b77a3b72cf814960c675518b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Oct 2016 10:46:18 +0300 Subject: [PATCH 157/305] drm/arcpgu: Accommodate adv7511 switch to DRM bridge ARC PGU driver starts crashing on initialization after 'commit e12c2f645557 ("drm/i2c: adv7511: Convert to drm_bridge")' This happenes because in "arcpgu_drm_hdmi_init" function we get pointer of "drm_i2c_encoder_driver" structure, which doesn't exist after adv7511 hdmi encoder interface changed from slave encoder to drm bridge. So, when we call "encoder_init" function from this structure driver crashes. Bootlog: ------------------------------------->8-------------------------------- [drm] Initialized drm 1.1.0 20060810 arcpgu e0017000.pgu: arc_pgu ID: 0xabbabaab arcpgu e0017000.pgu: assigned reserved memory node frame_buffer@9e000000 Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-00001-gb5642252fa01-dirty #8 task: 9a058000 task.stack: 9a032000 [ECR ]: 0x00220100 => Invalid Read @ 0x00000004 by insn @ 0x803934e8 [EFA ]: 0x00000004 [BLINK ]: drm_atomic_helper_connector_dpms+0xa6/0x230 [ERET ]: drm_atomic_helper_connector_dpms+0xa4/0x230 [STAT32]: 0x00000846 : K DE E2 E1 BTA: 0x8016d949 SP: 0x9a033e34 FP: 0x00000000 LPS: 0x8036f6fc LPE: 0x8036f700 LPC: 0x00000000 r00: 0x8063c118 r01: 0x805b98ac r02: 0x00000b11 r03: 0x00000000 r04: 0x9a010f54 r05: 0x00000000 r06: 0x00000001 r07: 0x00000000 r08: 0x00000028 r09: 0x00000001 r10: 0x00000007 r11: 0x00000054 r12: 0x720a3033 Stack Trace: drm_atomic_helper_connector_dpms+0xa4/0x230 arcpgu_drm_hdmi_init+0xbc/0x228 arcpgu_probe+0x168/0x244 platform_drv_probe+0x26/0x64 really_probe+0x1f0/0x32c __driver_attach+0xa8/0xd0 bus_for_each_dev+0x3c/0x74 bus_add_driver+0xc2/0x184 driver_register+0x50/0xec do_one_initcall+0x3a/0x120 kernel_init_freeable+0x108/0x1a0 ------------------------------------->8-------------------------------- Fix ARC PGU driver to be able work with drm bridge hdmi encoder interface. The hdmi connector code isn't needed anymore as we expect the adv7511 bridge driver to create/manage the connector. Signed-off-by: Eugeniy Paltsev Reviewed-by: Archit Taneja Signed-off-by: Alexey Brodkin --- drivers/gpu/drm/arc/arcpgu_hdmi.c | 159 ++++-------------------------- 1 file changed, 17 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/arc/arcpgu_hdmi.c b/drivers/gpu/drm/arc/arcpgu_hdmi.c index b7a8b2ac4055..b69c66b4897e 100644 --- a/drivers/gpu/drm/arc/arcpgu_hdmi.c +++ b/drivers/gpu/drm/arc/arcpgu_hdmi.c @@ -14,170 +14,45 @@ * */ -#include +#include #include -#include #include "arcpgu.h" -struct arcpgu_drm_connector { - struct drm_connector connector; - struct drm_encoder_slave *encoder_slave; -}; - -static int arcpgu_drm_connector_get_modes(struct drm_connector *connector) -{ - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_get_modes: cannot find slave encoder for connector\n"); - return 0; - } - - sfuncs = slave->slave_funcs; - if (sfuncs->get_modes == NULL) - return 0; - - return sfuncs->get_modes(&slave->base, connector); -} - -static enum drm_connector_status -arcpgu_drm_connector_detect(struct drm_connector *connector, bool force) -{ - enum drm_connector_status status = connector_status_unknown; - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_detect: cannot find slave encoder for connector\n"); - return status; - } - - sfuncs = slave->slave_funcs; - if (sfuncs && sfuncs->detect) - return sfuncs->detect(&slave->base, connector); - - dev_err(connector->dev->dev, "connector_detect: could not detect slave funcs\n"); - return status; -} - -static void arcpgu_drm_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static const struct drm_connector_helper_funcs -arcpgu_drm_connector_helper_funcs = { - .get_modes = arcpgu_drm_connector_get_modes, -}; - -static const struct drm_connector_funcs arcpgu_drm_connector_funcs = { - .dpms = drm_helper_connector_dpms, - .reset = drm_atomic_helper_connector_reset, - .detect = arcpgu_drm_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = arcpgu_drm_connector_destroy, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static struct drm_encoder_helper_funcs arcpgu_drm_encoder_helper_funcs = { - .dpms = drm_i2c_encoder_dpms, - .mode_fixup = drm_i2c_encoder_mode_fixup, - .mode_set = drm_i2c_encoder_mode_set, - .prepare = drm_i2c_encoder_prepare, - .commit = drm_i2c_encoder_commit, - .detect = drm_i2c_encoder_detect, -}; - static struct drm_encoder_funcs arcpgu_drm_encoder_funcs = { .destroy = drm_encoder_cleanup, }; int arcpgu_drm_hdmi_init(struct drm_device *drm, struct device_node *np) { - struct arcpgu_drm_connector *arcpgu_connector; - struct drm_i2c_encoder_driver *driver; - struct drm_encoder_slave *encoder; - struct drm_connector *connector; - struct i2c_client *i2c_slave; - int ret; + struct drm_encoder *encoder; + struct drm_bridge *bridge; + + int ret = 0; encoder = devm_kzalloc(drm->dev, sizeof(*encoder), GFP_KERNEL); if (encoder == NULL) return -ENOMEM; - i2c_slave = of_find_i2c_device_by_node(np); - if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) { - dev_err(drm->dev, "failed to find i2c slave encoder\n"); + /* Locate drm bridge from the hdmi encoder DT node */ + bridge = of_drm_find_bridge(np); + if (!bridge) return -EPROBE_DEFER; - } - if (i2c_slave->dev.driver == NULL) { - dev_err(drm->dev, "failed to find i2c slave driver\n"); - return -EPROBE_DEFER; - } - - driver = - to_drm_i2c_encoder_driver(to_i2c_driver(i2c_slave->dev.driver)); - ret = driver->encoder_init(i2c_slave, drm, encoder); - if (ret) { - dev_err(drm->dev, "failed to initialize i2c encoder slave\n"); - return ret; - } - - encoder->base.possible_crtcs = 1; - encoder->base.possible_clones = 0; - ret = drm_encoder_init(drm, &encoder->base, &arcpgu_drm_encoder_funcs, + encoder->possible_crtcs = 1; + encoder->possible_clones = 0; + ret = drm_encoder_init(drm, encoder, &arcpgu_drm_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); if (ret) return ret; - drm_encoder_helper_add(&encoder->base, - &arcpgu_drm_encoder_helper_funcs); + /* Link drm_bridge to encoder */ + bridge->encoder = encoder; + encoder->bridge = bridge; - arcpgu_connector = devm_kzalloc(drm->dev, sizeof(*arcpgu_connector), - GFP_KERNEL); - if (!arcpgu_connector) { - ret = -ENOMEM; - goto error_encoder_cleanup; - } + ret = drm_bridge_attach(drm, bridge); + if (ret) + drm_encoder_cleanup(encoder); - connector = &arcpgu_connector->connector; - drm_connector_helper_add(connector, &arcpgu_drm_connector_helper_funcs); - ret = drm_connector_init(drm, connector, &arcpgu_drm_connector_funcs, - DRM_MODE_CONNECTOR_HDMIA); - if (ret < 0) { - dev_err(drm->dev, "failed to initialize drm connector\n"); - goto error_encoder_cleanup; - } - - ret = drm_mode_connector_attach_encoder(connector, &encoder->base); - if (ret < 0) { - dev_err(drm->dev, "could not attach connector to encoder\n"); - drm_connector_unregister(connector); - goto error_connector_cleanup; - } - - arcpgu_connector->encoder_slave = encoder; - - return 0; - -error_connector_cleanup: - drm_connector_cleanup(connector); - -error_encoder_cleanup: - drm_encoder_cleanup(&encoder->base); return ret; } From d786810b2f896854506e7b698a137f074942e410 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 8 Nov 2016 13:54:41 -0500 Subject: [PATCH 158/305] perf/x86/intel/uncore: Add more Intel uncore IMC PCI IDs for SkyLake Several uncore IMC PCI IDs are missed for Intel SkyLake. Add the PCI IDs for SkyLake Y, U, H and S platforms. Rename the ID macros for 0x191f and 0x190c. The corresponding bug: https://bugzilla.kernel.org/show_bug.cgi?id=187301 The related datasheets are also attached in the bug entry for permanent reference. Reported-by: Ben Widawsky Tested-by: Ben Widawsky Signed-off-by: Kan Liang Reviewed-by: Ben Widawsky Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1478631281-5061-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 32 ++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..81195cca7eae 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -616,13 +620,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +686,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; From 48004881f6935704e5e4ffaf9e0ec921a25db243 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 16:52:04 +0000 Subject: [PATCH 159/305] drm/i915: Mark CPU cache as dirty when used for rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LLC, or even snooped, machines rendering via the GPU ends up in the CPU cache. This cacheline dirt also needs to be flushed to main memory when moving to an incoherent domain, such as the display's scanout engine. Mostly, this happens because either the object is marked as dirty from its first use or is avoided by setting the object into the display domain from the start. v2: Treat WT as not requiring a clflush prior to use on the display engine as well. Fixes: 0f71979ab7fb ("drm/i915: Performed deferred clflush inside set-cache-level") References: https://bugs.freedesktop.org/show_bug.cgi?id=95414 Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Ville Syrjälä Cc: # v4.0+ Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161107165204.7008-1-chris@chris-wilson.co.uk (cherry picked from commit 7aa6ca61ee5546d74b76610894924cdb0d4a1af0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7adb4c77cc7f..a218c2e395e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1281,6 +1281,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1311,6 +1317,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; } if (flags & EXEC_OBJECT_NEEDS_FENCE) From 9f1a7ab260300c670608a9db861187069f8b179a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Nov 2016 22:20:55 +0200 Subject: [PATCH 160/305] drm/i915: Grab the rotation from the passed plane state for VLV sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the passed in plane_state instead of plane->state in vlv_update_plane(). Currently the two are one and the same, but if we start queuing up multiple plane updates they might not be. Looks like this was rebase fail on my part. Cc: Daniel Vetter Fixes: 8d0deca8c6e0 ("drm/i915: Pass 90/270 vs. 0/180 rotation info for intel_gen4_compute_page_offset()") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 11df4d95b3ad9e6a6a6e0907bb200610a4d24887) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 73a521fdf1bd..dbed12c484c9 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -358,7 +358,7 @@ vlv_update_plane(struct drm_plane *dplane, int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; - unsigned int rotation = dplane->state->rotation; + unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; From fc22b787890f9f9067fd130feec42297a4ee62ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Oct 2016 16:44:38 +0300 Subject: [PATCH 161/305] drm/i915: Refresh that status of MST capable connectors in ->detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we've determined that the sink is MST capable we never end up running through the full detect cycle again, despite getting HPDs. Fix tht by ripping out the incorrect piece of code responsible. This got broken when I moved the long HPD handling to the ->detect() hook, but failed to remove the leftover code. Cc: Ander Conselvan de Oliveira Cc: drm-intel-fixes@lists.freedesktop.org Cc: Rui Tiago Matos Tested-by: Rui Tiago Matos Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98323 Cc: Kirill A. Shutemov Tested-by: Kirill A. Shutemov References: https://bugs.freedesktop.org/show_bug.cgi?id=98306 Fixes: 1015811609c0 ("drm/i915: Move long hpd handling into the hotplug work") Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477057478-29328-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 1aab956c7b8872fb6976328316bfad62c6e67cf8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3581b5a7f716..bf344d08356a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4463,21 +4463,11 @@ static enum drm_connector_status intel_dp_detect(struct drm_connector *connector, bool force) { struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (intel_dp->is_mst) { - /* MST devices are disconnected from a monitor POV */ - intel_dp_unset_edid(intel_dp); - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - return connector_status_disconnected; - } - /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) status = intel_dp_long_pulse(intel_dp->attached_connector); From 9a2541910dc7eaaa6859eea8a0ffda673059a623 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 11 Nov 2016 12:33:20 +0100 Subject: [PATCH 162/305] ALSA: hda - Fix mic regression by ASRock mobo fixup The commit [1a3f099101b8: ALSA: hda - Fix surround output pins for ASRock B150M mobo] introduced a fixup of pin configs for ASRock mobos to fix the surround outputs. However, this overrides the pin configs of the mic pins as if they are outputs-only, effectively disabling the mic inputs. Of course, it's a regression wrt mic functionality. Actually the pins 0x18 and 0x1a don't need to be changed; we just need to disable the bogus pins 0x14 and 0x15. Then the auto-parser will pick up mic pins as switchable and assign the surround outputs there. This patch removes the incorrect pin overrides of NID 0x18 and 0x1a from the ASRock fixup. Fixes: 1a3f099101b8 ('ALSA: hda - Fix surround output pins for ASRock...') Reported-and-tested-by: Vitor Antunes Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=187431 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2f909dd8b7b8..ea81c08ddc7a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6907,8 +6907,6 @@ static const struct hda_fixup alc662_fixups[] = { .v.pins = (const struct hda_pintbl[]) { { 0x15, 0x40f000f0 }, /* disabled */ { 0x16, 0x40f000f0 }, /* disabled */ - { 0x18, 0x01014011 }, /* LO */ - { 0x1a, 0x01014012 }, /* LO */ { } } }, From 8e94a46c1770884166b31adc99eba7da65a446a7 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 9 Nov 2016 02:25:15 +0100 Subject: [PATCH 163/305] drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External clients which import our bo's wait only for exclusive dmabuf-fences, not on shared ones, ditto for bo's which we import from external providers and write to. Therefore attach exclusive fences on prime shared buffers if our exported buffer gets imported by an external client, or if we import a buffer from an external exporter. See discussion in thread: https://lists.freedesktop.org/archives/dri-devel/2016-October/122370.html Prime export tested on Intel iGPU + AMD Tonga dGPU as DRI3/Present Prime render offload, and with the Tonga standalone as primary gpu. v2: Add a wait for all shared fences before prime export, as suggested by Christian Koenig. v3: - Mark buffer prime_exported in amdgpu_gem_prime_pin, so we only use the exclusive fence when exporting a bo to external clients like a separate iGPU, but not when exporting/importing from/to ourselves as part of regular DRI3 fd passing. - Propagate failure of reservation_object_wait_rcu back to caller. v4: - Switch to a prime_shared_count counter instead of a flag, which gets in/decremented on prime_pin/unpin, so we can switch back to shared fences if all clients detach from our exported bo. - Also switch to exclusive fence for prime imported bo's. v5: - Drop lret, instead use int ret -> long ret, as proposed by Christian. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95472 Tested-by: Mike Lothian (v1) Signed-off-by: Mario Kleiner Reviewed-by: Christian König . Cc: Christian König Cc: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 039b57e4644c..496f72b134eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -459,6 +459,7 @@ struct amdgpu_bo { u64 metadata_flags; void *metadata; u32 metadata_size; + unsigned prime_shared_count; /* list of all virtual address to which this bo * is associated to */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 651115dcce12..c02db01f6583 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -132,7 +132,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = true; + entry->tv.shared = !entry->robj->prime_shared_count; if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 7700dc22f243..3826d5aea0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -74,20 +74,36 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); + bo->prime_shared_count = 1; return &bo->gem_base; } int amdgpu_gem_prime_pin(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int ret = 0; + long ret = 0; ret = amdgpu_bo_reserve(bo, false); if (unlikely(ret != 0)) return ret; + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(ret < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); + amdgpu_bo_unreserve(bo); + return ret; + } + /* pin buffer into GTT */ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (likely(ret == 0)) + bo->prime_shared_count++; + amdgpu_bo_unreserve(bo); return ret; } @@ -102,6 +118,8 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) return; amdgpu_bo_unpin(bo); + if (bo->prime_shared_count) + bo->prime_shared_count--; amdgpu_bo_unreserve(bo); } From f23ed166f283b1a6f0a1f0b0c889e8df9a10ff85 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Nov 2016 17:57:01 +1100 Subject: [PATCH 164/305] powerpc/64s: Fix system reset interrupt winkle wakeups Wakeups from winkle set the low bit of the HSPRG0 register, to distinguish it from other sleep states. This is also the PACA pointer. The system reset exception handler fails to mask this bit away before using this value before using it as the PACA pointer. Fix this by adding a new type of exception prolog macro where we already have the PACA set in r13, and have the system reset vector mask it out. The winkle wakeup handler will store the masked value back into HSPRG0. Fixes: fb479e44a9e2 ("powerpc/64s: relocation, register save fixes for system reset interrupt") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 13 +++++++++++-- arch/powerpc/kernel/exceptions-64s.S | 11 ++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 84d49b197c32..3ce43664eadf 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -158,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \ std ra,offset(r13); \ END_FTR_SECTION_NESTED(ftr,ftr,943) -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ +#define EXCEPTION_PROLOG_0_PACA(area) \ std r9,area+EX_R9(r13); /* save r9 */ \ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ HMT_MEDIUM; \ std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +#define EXCEPTION_PROLOG_0(area) \ + GET_PACA(r13); \ + EXCEPTION_PROLOG_0_PACA(area) + #define __EXCEPTION_PROLOG_1(area, extra, vec) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ @@ -196,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); +/* Have the PACA in r13 already */ +#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0_PACA(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, h); + #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08ba447a4b3d..1ba82ea90230 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,7 +116,9 @@ EXC_VIRT_NONE(0x4000, 0x4100) EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + GET_PACA(r13) + clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ + EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x200) @@ -124,6 +126,9 @@ EXC_VIRT_NONE(0x4100, 0x4200) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) +BEGIN_FTR_SECTION + GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -169,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300) SET_SCRATCH0(r13) /* save r13 */ /* * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPGR0 would be set + * inside machine check. If yes, then last bit of HSPRG0 would be set * to 1. Hence clear it unconditionally. */ GET_PACA(r13) @@ -388,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) /* * Go back to winkle. Please note that this thread was woken up in * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPGR0 + * state. Hence before going back to winkle, set last bit of HSPRG0 * to 1. This will make sure that if this thread gets woken up * again at reset vector 0x100 then it will get chance to restore * the subcore state. From e6740ae631db02e4f3a6742e2a38ea63718d8d17 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 7 Nov 2016 22:28:21 -0800 Subject: [PATCH 165/305] powerpc: Fix exception vector build with 2.23 era binutils The changes to use gas sections for constructing the exception vectors causes a build break when using binutils 2.23: arch/powerpc/kernel/exceptions-64s.S:770: Error: operand out of range (0xffffffffffff8100 is not between 0x0000000000000000 and 0x000000000000ffff) And so on. Reported by Hugh with binutils-2.23.2-8.1.4.ppc64 from openSUSE 13.1 and also Naveen & Denis using 2.23.52.0.1-26.el7 from RHEL 7. Strangely binutils 2.22 (what I test with) is not affected. This is caused by the use of @l in LOAD_HANDLER(). The @l was only recently added in commit a24553dd02dc ("powerpc/pseries: Remove unnecessary syscall trampoline"). Luckily the gas section changes split out the LOAD_SYSCALL_HANDLER() macro, which means we actually *don't* need to use @l in LOAD_HANDLER() any more, only in LOAD_SYSCALL_HANDLER(). So drop the @l from LOAD_HANDLER(). Fixes: 57f266497d81 ("powerpc: Use gas sections for arranging exception vectors") Signed-off-by: Hugh Dickins [mpe: Add gory details to change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 3ce43664eadf..9a3eee661297 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -91,7 +91,7 @@ */ #define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); #define __LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ From 9a1f490f358e44a1cf463ba8124ca39fcc042992 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:46 +1100 Subject: [PATCH 166/305] powerpc/oops: Fix missing pr_cont()s in show_stack() Previously we got away with printing the stack trace in multiple pieces and it usually looked right. But since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines"), KERN_CONT is now required when printing continuation lines. Use pr_cont() as appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce6dc61b15b2..621d9b23df72 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth) && curr_frame >= 0) { - printk(" (%pS)", + pr_cont(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } #endif if (firstframe) - printk(" (unreliable)"); - printk("\n"); + pr_cont(" (unreliable)"); + pr_cont("\n"); } firstframe = 0; From db5ba5ae6e8d5374429212de8e20933a8a0ce52e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:47 +1100 Subject: [PATCH 167/305] powerpc/oops: Fix missing pr_cont()s in print_msr_bits() et. al. Since the KERN_CONT changes these are being horribly split across lines, for example: MSR: 8000000000009033 < SF,EE ,ME,IR ,DR,RI ,LE> So fix it by using pr_cont() where appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 621d9b23df72..38f85d7a1e06 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep) for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", s, bits->name); + pr_cont("%s%s", s, bits->name); s = sep; } } @@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val) * T: Transactional (bit 34) */ if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { - printk(",TM["); + pr_cont(",TM["); print_bits(val, msr_tm_bits, ""); - printk("]"); + pr_cont("]"); } } #else @@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {} static void print_msr_bits(unsigned long val) { - printk("<"); + pr_cont("<"); print_bits(val, msr_bits, ","); print_tm_bits(val); - printk(">"); + pr_cont(">"); } #ifdef CONFIG_PPC64 From 7dae865f5878fc0c2edfb3b9165712ef33ce03df Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Nov 2016 20:45:26 +1100 Subject: [PATCH 168/305] powerpc/oops: Fix missing pr_cont()s in show_regs() Fix up our oops output by converting continuation lines to use pr_cont(). Some of these are dubious, eg. printing a continuation line which starts with a newline, but seem to work OK for now. This whole function needs a rewrite in the next release. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 38f85d7a1e06..6fe8fa481f8a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG" ", regs->orig_gpr3); + pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - printk("SOFTE: %ld ", regs->softe); + pr_cont("SOFTE: %ld ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) - printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); + pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) - printk("\nGPR%02d: ", i); - printk(REG " ", regs->gpr[i]); + pr_cont("\nGPR%02d: ", i); + pr_cont(REG " ", regs->gpr[i]); if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; } - printk("\n"); + pr_cont("\n"); #ifdef CONFIG_KALLSYMS /* * Lookup NIP late so we have the best change of getting the From 2ffd04dee0dacff36c03a02434965a96da032bcd Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 4 Nov 2016 17:20:40 +1100 Subject: [PATCH 169/305] powerpc/oops: Fix missing pr_cont()s in instruction dump Since the KERN_CONT changes, the current code in show_instructions() prints out a whole bunch of unnecessary newlines. Change occurrences of printk("\n") to pr_cont("\n"). While we're here, change all the other cases of printk(KERN_CONT ...) to pr_cont() as well. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6fe8fa481f8a..49a680d5ae37 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs) int instr; if (!(i % 8)) - printk("\n"); + pr_cont("\n"); #if !defined(CONFIG_BOOKE) /* If executing with the IMMU off, adjust pc rather @@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs) if (!__kernel_text_address(pc) || probe_kernel_address((unsigned int __user *)pc, instr)) { - printk(KERN_CONT "XXXXXXXX "); + pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) - printk(KERN_CONT "<%08x> ", instr); + pr_cont("<%08x> ", instr); else - printk(KERN_CONT "%08x ", instr); + pr_cont("%08x ", instr); } pc += sizeof(int); } - printk("\n"); + pr_cont("\n"); } struct regbit { From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: [PATCH 170/305] bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++ net/core/dev.c | 17 ++++------ net/core/filter.c | 68 ++++++++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index eaad4c28069f..6666b28b6815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } - skb_scrub_packet(skb, true); - skb->priority = 0; - skb->protocol = eth_type_trans(skb, dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dev_forward_skb); diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..b391209838ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) return dev_forward_skb(dev, skb); } +static inline int __bpf_rx_skb_no_mac(struct net_device *dev, + struct sk_buff *skb) +{ + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->dev = dev; + ret = netif_rx(skb); + } + + return ret; +} + static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + /* skb->mac_len is not set on normal egress */ + unsigned int mlen = skb->network_header - skb->mac_header; + + __skb_pull(skb, mlen); + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + skb_pop_mac_header(skb); + skb_reset_mac_len(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return __bpf_redirect_no_mac(skb, dev, flags); + default: + return __bpf_redirect_common(skb, dev, flags); + } +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) return -ENOMEM; } - bpf_push_mac_rcsum(clone); - - return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); + return __bpf_redirect(clone, dev, flags); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - bpf_push_mac_rcsum(skb); - - return ri->flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + return __bpf_redirect(skb, dev, ri->flags); } static const struct bpf_func_proto bpf_redirect_proto = { From 90e02896f1a4627b14624245fbcbc19f8fd916cb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:34 -0800 Subject: [PATCH 171/305] bpf: Add test for bpf_redirect to ipip/ip6tnl The test creates two netns, ns1 and ns2. The host (the default netns) has an ipip or ip6tnl dev configured for tunneling traffic to the ns2. ping VIPS from ns1 <----> host <--tunnel--> ns2 (VIPs at loopback) The test is to have ns1 pinging VIPs configured at the loopback interface in ns2. The VIPs are 10.10.1.102 and 2401:face::66 (which are configured at lo@ns2). [Note: 0x66 => 102]. At ns1, the VIPs are routed _via_ the host. At the host, bpf programs are installed at the veth to redirect packets from a veth to the ipip/ip6tnl. The test is configured in a way so that both ingress and egress can be tested. At ns2, the ipip/ip6tnl dev is configured with the local and remote address specified. The return path is routed to the dev ipip/ip6tnl. During egress test, the host also locally tests pinging the VIPs to ensure that bpf_redirect at egress also works for the direct egress (i.e. not forwarding from dev ve1 to ve2). Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/tc_l2_redirect.sh | 173 ++++++++++++++++++++++ samples/bpf/tc_l2_redirect_kern.c | 236 ++++++++++++++++++++++++++++++ samples/bpf/tc_l2_redirect_user.c | 73 +++++++++ 4 files changed, 486 insertions(+) create mode 100755 samples/bpf/tc_l2_redirect.sh create mode 100644 samples/bpf/tc_l2_redirect_kern.c create mode 100644 samples/bpf/tc_l2_redirect_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 12b7304d55dc..72c58675973e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -27,6 +27,7 @@ hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip +hostprogs-y += tc_l2_redirect test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o libbpf.o trace_event_user.o sampleip-objs := bpf_load.o libbpf.o sampleip_user.o +tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += tcbpf2_kern.o +always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o @@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf +HOSTLOADLIBES_tc_l2_redirect += -l elf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh new file mode 100755 index 000000000000..80a05591a140 --- /dev/null +++ b/samples/bpf/tc_l2_redirect.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +[[ -z $TC ]] && TC='tc' +[[ -z $IP ]] && IP='ip' + +REDIRECT_USER='./tc_l2_redirect' +REDIRECT_BPF='./tc_l2_redirect_kern.o' + +RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) +IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) + +function config_common { + local tun_type=$1 + + $IP netns add ns1 + $IP netns add ns2 + $IP link add ve1 type veth peer name vens1 + $IP link add ve2 type veth peer name vens2 + $IP link set dev ve1 up + $IP link set dev ve2 up + $IP link set dev ve1 mtu 1500 + $IP link set dev ve2 mtu 1500 + $IP link set dev vens1 netns ns1 + $IP link set dev vens2 netns ns2 + + $IP -n ns1 link set dev lo up + $IP -n ns1 link set dev vens1 up + $IP -n ns1 addr add 10.1.1.101/24 dev vens1 + $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad + $IP -n ns1 route add default via 10.1.1.1 dev vens1 + $IP -n ns1 route add default via 2401:db01::1 dev vens1 + + $IP -n ns2 link set dev lo up + $IP -n ns2 link set dev vens2 up + $IP -n ns2 addr add 10.2.1.102/24 dev vens2 + $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad + $IP -n ns2 addr add 10.10.1.102 dev lo + $IP -n ns2 addr add 2401:face::66/64 dev lo nodad + $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1 + $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1 + $IP -n ns2 link set dev ipt2 up + $IP -n ns2 link set dev ip6t2 up + $IP netns exec ns2 $TC qdisc add dev vens2 clsact + $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip + if [[ $tun_type == "ipip" ]]; then + $IP -n ns2 route add 10.1.1.0/24 dev ipt2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0 + else + $IP -n ns2 route add 10.1.1.0/24 dev ip6t2 + $IP -n ns2 route add 2401:db01::/64 dev ip6t2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0 + fi + + $IP addr add 10.1.1.1/24 dev ve1 + $IP addr add 2401:db01::1/64 dev ve1 nodad + $IP addr add 10.2.1.1/24 dev ve2 + $IP addr add 2401:db02::1/64 dev ve2 nodad + + $TC qdisc add dev ve2 clsact + $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward + + sysctl -q -w net.ipv4.conf.all.rp_filter=0 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +function cleanup { + set +e + [[ -z $DEBUG ]] || set +x + $IP netns delete ns1 >& /dev/null + $IP netns delete ns2 >& /dev/null + $IP link del ve1 >& /dev/null + $IP link del ve2 >& /dev/null + $IP link del ipt >& /dev/null + $IP link del ip6t >& /dev/null + sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER + sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING + rm -f /sys/fs/bpf/tc/globals/tun_iface + [[ -z $DEBUG ]] || set -x + set -e +} + +function l2_to_ipip { + echo -n "l2_to_ipip $1: " + + local dir=$1 + + config_common ipip + + $IP link add ipt type ipip external + $IP link set dev ipt up + sysctl -q -w net.ipv4.conf.ipt.rp_filter=0 + sysctl -q -w net.ipv4.conf.ipt.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + fi + + cleanup + + echo "OK" +} + +function l2_to_ip6tnl { + echo -n "l2_to_ip6tnl $1: " + + local dir=$1 + + config_common ip6tnl + + $IP link add ip6t type ip6tnl mode any external + $IP link set dev ip6t up + sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0 + sysctl -q -w net.ipv4.conf.ip6t.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $IP route add 2401:face::/64 via 2401:db02::66 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + ping -6 -c1 2401:face::66 >& /dev/null + fi + + cleanup + + echo "OK" +} + +cleanup +test_names="l2_to_ipip l2_to_ip6tnl" +test_dirs="ingress egress" +if [[ $# -ge 2 ]]; then + test_names=$1 + test_dirs=$2 +elif [[ $# -ge 1 ]]; then + test_names=$1 +fi + +for t in $test_names; do + for d in $test_dirs; do + $t $d + done +done diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c new file mode 100644 index 000000000000..92a44729dbe4 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define _htonl __builtin_bswap32 + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +struct bpf_elf_map SEC("maps") tun_iface = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) +{ + if (eth_proto == htons(ETH_P_IP)) + return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100); + else if (eth_proto == htons(ETH_P_IPV6)) + return (daddr == _htonl(0x2401face)); + + return false; +} + +SEC("l2_to_iptun_ingress_forward") +int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (iph->protocol != IPPROTO_IPIP) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex, + _htonl(iph->daddr)); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr != IPPROTO_IPIP && + ip6h->nexthdr != IPPROTO_IPV6) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex, + _htonl(ip6h->daddr.s6_addr32[0]), + _htonl(ip6h->daddr.s6_addr32[3])); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } + + return TC_ACT_OK; +} + +SEC("l2_to_iptun_ingress_redirect") +int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + __be32 daddr = iph->daddr; + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */ + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); + return bpf_redirect(*ifindex, 0); +} + +SEC("l2_to_ip6tun_ingress_redirect") +int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr), + *ifindex); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), + _htonl(ip6h->daddr.s6_addr32[0]), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + /* 2401:db02:0:0:0:0:0:66 */ + tkey.remote_ipv6[0] = _htonl(0x2401db02); + tkey.remote_ipv6[1] = 0; + tkey.remote_ipv6[2] = 0; + tkey.remote_ipv6[3] = _htonl(0x00000066); + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6); + return bpf_redirect(*ifindex, 0); +} + +SEC("drop_non_tun_vip") +int _drop_non_tun_vip(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_SHOT; + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c new file mode 100644 index 000000000000..4013c5337b91 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_user.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: tc_l2_ipip_redirect [...]\n"); + printf(" -U Update an already pinned BPF array\n"); + printf(" -i Interface index\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL; + int ifindex = -1; + int array_key = 0; + int array_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:i:")) != -1) { + switch (opt) { + /* General args */ + case 'U': + pinned_file = optarg; + break; + case 'i': + ifindex = atoi(optarg); + break; + default: + usage(); + goto out; + } + } + + if (ifindex < 0 || !pinned_file) { + usage(); + goto out; + } + + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + + /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ + ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + +out: + if (array_fd != -1) + close(array_fd); + return ret; +} From 34fad54c2537f7c99d07375e50cb30aa3c23bd83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2016 16:04:46 -0800 Subject: [PATCH 172/305] net: __skb_flow_dissect() must cap its return value After Tom patch, thoff field could point past the end of the buffer, this could fool some callers. If an skb was provided, skb->len should be the upper limit. If not, hlen is supposed to be the upper limit. Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Reported-by: Yibin Yang Acked-by: Willem de Bruijn Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ab193e5def07..69e4463a4b1b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -549,12 +549,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); From 10b217681ddec4fa3ddb375bb188fec504523da4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 10 Nov 2016 13:21:42 +0200 Subject: [PATCH 173/305] net: bpqether.h: remove if_ether.h guard __LINUX_IF_ETHER_H is not defined anywhere, and if_ether.h can keep itself from double inclusion, though it uses a single underscore prefix. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- include/uapi/linux/bpqether.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/linux/bpqether.h b/include/uapi/linux/bpqether.h index a6c35e1a89ad..05865edaefda 100644 --- a/include/uapi/linux/bpqether.h +++ b/include/uapi/linux/bpqether.h @@ -5,9 +5,7 @@ * Defines for the BPQETHER pseudo device driver */ -#ifndef __LINUX_IF_ETHER_H #include -#endif #define SIOCSBPQETHOPT (SIOCDEVPRIVATE+0) /* reserved */ #define SIOCSBPQETHADDR (SIOCDEVPRIVATE+1) From 02e56902e40e4c1ff57590c717e46377b72d5966 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 Nov 2016 21:04:23 +0000 Subject: [PATCH 174/305] x86/efi: Fix EFI memmap pointer size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this when building on 32-bit: arch/x86/platform/efi/efi.c: In function ‘__efi_enter_virtual_mode’: arch/x86/platform/efi/efi.c:911:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ arch/x86/platform/efi/efi.c:918:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ The @pa local variable is declared as phys_addr_t and that is a u64 when CONFIG_PHYS_ADDR_T_64BIT=y. (The last is enabled on 32-bit on a PAE build.) However, its value comes from __pa() which is basically doing pointer arithmetic and checking, and returns unsigned long as it is the native pointer width. So let's use an unsigned long too. It should be fine to do so because the later users cast it to a pointer too. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bf99aa7005eb..936a488d6cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -861,7 +861,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; - phys_addr_t pa; + unsigned long pa; efi.systab = NULL; From f6697df36bdf0bf7fce984605c2918d4a7b4269f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 12 Nov 2016 21:04:24 +0000 Subject: [PATCH 175/305] x86/efi: Prevent mixed mode boot corruption with CONFIG_VMAP_STACK=y Booting an EFI mixed mode kernel has been crashing since commit: e37e43a497d5 ("x86/mm/64: Enable vmapped stacks (CONFIG_HAVE_ARCH_VMAP_STACK=y)") The user-visible effect in my test setup was the kernel being unable to find the root file system ramdisk. This was likely caused by silent memory or page table corruption. Enabling CONFIG_DEBUG_VIRTUAL=y immediately flagged the thunking code as abusing virt_to_phys() because it was passing addresses that were not part of the kernel direct mapping. Use the slow version instead, which correctly handles all memory regions by performing a page table walk. Suggested-by: Andy Lutomirski Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 80 ++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 58b0f801f66f..319148bd4b05 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,35 @@ void efi_sync_low_kernel_mappings(void) memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + bool bad_size; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + /* + * A fully aligned variable on the stack is guaranteed not to + * cross a page bounary. Try to catch strings on the stack by + * checking that 'size' is a power of two. + */ + bad_size = size > PAGE_SIZE || !is_power_of_2(size); + + WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + + return slow_virt_to_phys(va); +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; @@ -494,8 +524,8 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); - phys_tc = virt_to_phys(tc); + phys_tm = virt_to_phys_or_null(tm); + phys_tc = virt_to_phys_or_null(tc); status = efi_thunk(get_time, phys_tm, phys_tc); @@ -511,7 +541,7 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_time, phys_tm); @@ -529,9 +559,9 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, spin_lock(&rtc_lock); - phys_enabled = virt_to_phys(enabled); - phys_pending = virt_to_phys(pending); - phys_tm = virt_to_phys(tm); + phys_enabled = virt_to_phys_or_null(enabled); + phys_pending = virt_to_phys_or_null(pending); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(get_wakeup_time, phys_enabled, phys_pending, phys_tm); @@ -549,7 +579,7 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_wakeup_time, enabled, phys_tm); @@ -558,6 +588,10 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return status; } +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, @@ -567,11 +601,11 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; - phys_data_size = virt_to_phys(data_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); - phys_attr = virt_to_phys(attr); - phys_data = virt_to_phys(data); + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); status = efi_thunk(get_variable, phys_name, phys_vendor, phys_attr, phys_data_size, phys_data); @@ -586,9 +620,9 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_data; efi_status_t status; - phys_name = virt_to_phys(name); - phys_vendor = virt_to_phys(vendor); - phys_data = virt_to_phys(data); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vendor); + phys_data = virt_to_phys_or_null_size(data, data_size); /* If data_size is > sizeof(u32) we've got problems */ status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -605,9 +639,9 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; - phys_name_size = virt_to_phys(name_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, *name_size); status = efi_thunk(get_next_variable, phys_name_size, phys_name, phys_vendor); @@ -621,7 +655,7 @@ efi_thunk_get_next_high_mono_count(u32 *count) efi_status_t status; u32 phys_count; - phys_count = virt_to_phys(count); + phys_count = virt_to_phys_or_null(count); status = efi_thunk(get_next_high_mono_count, phys_count); return status; @@ -633,7 +667,7 @@ efi_thunk_reset_system(int reset_type, efi_status_t status, { u32 phys_data; - phys_data = virt_to_phys(data); + phys_data = virt_to_phys_or_null_size(data, data_size); efi_thunk(reset_system, reset_type, status, data_size, phys_data); } @@ -661,9 +695,9 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - phys_storage = virt_to_phys(storage_space); - phys_remaining = virt_to_phys(remaining_space); - phys_max = virt_to_phys(max_variable_size); + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); status = efi_thunk(query_variable_info, attr, phys_storage, phys_remaining, phys_max); From 83d2c9a9c17b1e9f23a3a0c24c03cd18e4b02520 Mon Sep 17 00:00:00 2001 From: Sven Ebenfeld Date: Mon, 7 Nov 2016 18:51:34 +0100 Subject: [PATCH 176/305] crypto: caam - do not register AES-XTS mode on LP units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using AES-XTS on a Wandboard, we receive a Mode error: caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error. According to the Security Reference Manual, the Low Power AES units of the i.MX6 do not support the XTS mode. Therefore we must not register XTS implementations in the Crypto API. Signed-off-by: Sven Ebenfeld Reviewed-by: Horia Geantă Cc: # 4.4+ Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)" Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 156aad167cd6..f5a63ba97023 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void) if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES)) continue; + /* + * Check support for AES modes not available + * on LP devices. + */ + if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) + if ((alg->class1_alg_type & OP_ALG_AAI_MASK) == + OP_ALG_AAI_XTS) + continue; + t_alg = caam_alg_alloc(alg); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); From ca0a75316dc62af92943761b1cc049e15c92eb09 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 9 Nov 2016 19:51:25 -0800 Subject: [PATCH 177/305] r8152: Fix error path in open function If usb_submit_urb() called from the open function fails, the following crash may be observed. r8152 8-1:1.0 eth0: intr_urb submit failed: -19 ... r8152 8-1:1.0 eth0: v1.08.3 Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b7b pgd = ffffffc0e7305000 [6b6b6b6b6b6b6b7b] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP ... PC is at notifier_chain_register+0x2c/0x58 LR is at blocking_notifier_chain_register+0x54/0x70 ... Call trace: [] notifier_chain_register+0x2c/0x58 [] blocking_notifier_chain_register+0x54/0x70 [] register_pm_notifier+0x24/0x2c [] rtl8152_open+0x3dc/0x3f8 [r8152] [] __dev_open+0xac/0x104 [] __dev_change_flags+0xb0/0x148 [] dev_change_flags+0x34/0x70 [] do_setlink+0x2c8/0x888 [] rtnl_newlink+0x328/0x644 [] rtnetlink_rcv_msg+0x1a8/0x1d4 [] netlink_rcv_skb+0x68/0xd0 [] rtnetlink_rcv+0x2c/0x3c [] netlink_unicast+0x16c/0x234 [] netlink_sendmsg+0x340/0x364 [] sock_sendmsg+0x48/0x60 [] SyS_sendto+0xe0/0x120 [] SyS_send+0x40/0x4c [] el0_svc_naked+0x24/0x28 Clean up error handling to avoid registering the notifier if the open function is going to fail. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 75c516889645..efb84f092492 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3266,10 +3266,8 @@ static int rtl8152_open(struct net_device *netdev) goto out; res = usb_autopm_get_interface(tp->intf); - if (res < 0) { - free_all_mem(tp); - goto out; - } + if (res < 0) + goto out_free; mutex_lock(&tp->control); @@ -3285,10 +3283,9 @@ static int rtl8152_open(struct net_device *netdev) netif_device_detach(tp->netdev); netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n", res); - free_all_mem(tp); - } else { - napi_enable(&tp->napi); + goto out_unlock; } + napi_enable(&tp->napi); mutex_unlock(&tp->control); @@ -3297,7 +3294,13 @@ static int rtl8152_open(struct net_device *netdev) tp->pm_notifier.notifier_call = rtl_notifier; register_pm_notifier(&tp->pm_notifier); #endif + return 0; +out_unlock: + mutex_unlock(&tp->control); + usb_autopm_put_interface(tp->intf); +out_free: + free_all_mem(tp); out: return res; } From 969447f226b451c453ddc83cac6144eaeac6f2e3 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Lin Date: Thu, 10 Nov 2016 11:16:15 -0500 Subject: [PATCH 178/305] ipv4: use new_gw for redirect neigh lookup In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0 and then the state of the neigh for the new_gw is checked. If the state isn't valid then the redirected route is deleted. This behavior is maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway is assigned to peer->redirect_learned.a4 before calling ipv4_neigh_lookup(). After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again."), ipv4_neigh_lookup() is performed without the rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw) isn't zero, the function uses it as the key. The neigh is most likely valid since the old_gw is the one that sends the ICMP redirect message. Then the new_gw is assigned to fib_nh_exception. The problem is: the new_gw ARP may never gets resolved and the traffic is blackholed. So, use the new_gw for neigh lookup. Changes from v1: - use __ipv4_neigh_lookup instead (per Eric Dumazet). Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.") Signed-off-by: Stephen Suryaputra Lin Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62d4d90c1389..2a57566e6e91 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); From ac6e780070e30e4c35bd395acfe9191e6268bdd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: [PATCH 179/305] tcp: take care of truncations done by sk_filter() With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 304a8e17bc87..123979fe12bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 61b7be303eec..2259114c7242 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_add_backlog); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1676,8 +1691,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6ca23c2e76f7..b9f1fee9a886 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1229,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1457,8 +1457,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; From 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 10 Nov 2016 19:08:39 -0500 Subject: [PATCH 180/305] Revert "include/uapi/linux/atm_zatm.h: include linux/time.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include linux/time.h"). This attempted to fix userspace breakage that no longer existed when the patch was merged. Almost one year earlier, commit 70ba07b675b5 ("atm: remove 'struct zatm_t_hist'") deleted the struct in question. After this patch was merged, we now have to deal with people being unable to include this header in conjunction with standard C library headers like stdlib.h (which linux-atm does). Example breakage: x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \ -I. -DCPPFLAGS_TEST -I../../src/include -O2 -march=native -pipe -g \ -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \ -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \ -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \ -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c In file included from /usr/include/linux/atm_zatm.h:17:0, from zntune.c:17: /usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’ struct timespec { ^ In file included from /usr/include/sys/select.h:43:0, from /usr/include/sys/types.h:219, from /usr/include/stdlib.h:314, from zntune.c:9: /usr/include/time.h:120:8: note: originally defined here struct timespec ^ Signed-off-by: Mike Frysinger Acked-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/atm_zatm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 5cd4d4d2dd1d..9c9c6ad55f14 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,7 +14,6 @@ #include #include -#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ From 3ffb6a39b751b635a0c50b650064c38b8d371ef2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:42 -0500 Subject: [PATCH 181/305] bnxt_en: Fix ring arithmetic in bnxt_setup_tc(). The logic is missing the check on whether the tx and rx rings are sharing completion rings or not. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a9f9f3738022..c6909660e097 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6309,6 +6309,7 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, struct tc_to_netdev *ntc) { struct bnxt *bp = netdev_priv(dev); + bool sh = false; u8 tc; if (ntc->type != TC_SETUP_MQPRIO) @@ -6325,12 +6326,11 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, if (netdev_get_num_tc(dev) == tc) return 0; + if (bp->flags & BNXT_FLAG_SHARED_RINGS) + sh = true; + if (tc) { int max_rx_rings, max_tx_rings, rc; - bool sh = false; - - if (bp->flags & BNXT_FLAG_SHARED_RINGS) - sh = true; rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings) @@ -6348,7 +6348,8 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, bp->tx_nr_rings = bp->tx_nr_rings_per_tc; netdev_reset_tc(dev); } - bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings); + bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : + bp->tx_nr_rings + bp->rx_nr_rings; bp->num_stat_ctxs = bp->cp_nr_rings; if (netif_running(bp->dev)) From 73b9bad63ae3c902ce64221d10a0d371d059748d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:43 -0500 Subject: [PATCH 182/305] bnxt_en: Fix VF virtual link state. If the physical link is down and the VF virtual link is set to "enable", the current code does not always work. If the link is down but the cable is attached, the firmware returns LINK_SIGNAL instead of NO_LINK. The current code is treating LINK_SIGNAL as link up. The fix is to treat link as down when the link_status != LINK. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ec6cd18842c3..60e2af8678bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -774,8 +774,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) if (vf->flags & BNXT_VF_LINK_UP) { /* if physical link is down, force link up on VF */ - if (phy_qcfg_resp.link == - PORT_PHY_QCFG_RESP_LINK_NO_LINK) { + if (phy_qcfg_resp.link != + PORT_PHY_QCFG_RESP_LINK_LINK) { phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_LINK; phy_qcfg_resp.link_speed = cpu_to_le16( From 2d644d4c7506646f9c4a2afceb7fd5f030bc0c9f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Fri, 11 Nov 2016 16:34:25 +0100 Subject: [PATCH 183/305] mlxsw: spectrum: Fix refcount bug on span entries When binding port to a newly created span entry, its refcount is initialized to zero even though it has a bound port. That leads to unexpected behaviour when the user tries to delete that port from the span entry. Fix this by initializing the reference count to 1. Also add a warning to put function. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1ec0a4ce3c46..dda5761e91bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) span_entry->used = true; span_entry->id = index; - span_entry->ref_count = 0; + span_entry->ref_count = 1; span_entry->local_port = local_port; return span_entry; } @@ -270,6 +270,7 @@ static struct mlxsw_sp_span_entry span_entry = mlxsw_sp_span_entry_find(port); if (span_entry) { + /* Already exists, just take a reference */ span_entry->ref_count++; return span_entry; } @@ -280,6 +281,7 @@ static struct mlxsw_sp_span_entry static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_span_entry *span_entry) { + WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); return 0; From 42cdb338f40a98e6558bae35456fe86b6e90e1ef Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Fri, 11 Nov 2016 16:34:26 +0100 Subject: [PATCH 184/305] mlxsw: spectrum_router: Correctly dump neighbour activity The device's neighbour table is periodically dumped in order to update the kernel about active neighbours. A single dump session may span multiple queries, until the response carries less records than requested or when a record (can contain up to four neighbour entries) is not full. Current code stops the session when the number of returned records is zero, which can result in infinite loop in case of high packet rate. Fix this by stopping the session according to the above logic. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 040737e14a3f..cbeeddd70c5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -800,6 +800,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, } } +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) +{ + u8 num_rec, last_rec_index, num_entries; + + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + last_rec_index = num_rec - 1; + + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) + return false; + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == + MLXSW_REG_RAUHTD_TYPE_IPV6) + return true; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + last_rec_index); + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) + return true; + return false; +} + static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) { char *rauhtd_pl; @@ -826,7 +846,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < num_rec; i++) mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); - } while (num_rec); + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); kfree(rauhtd_pl); From 46d054f8f540612f09987a53154aa39ae15f2e4c Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 11 Nov 2016 15:56:51 +0000 Subject: [PATCH 185/305] sfc: clear napi_hash state when copying channels efx_copy_channel() doesn't correctly clear the napi_hash related state. This means that when napi_hash_add is called for that channel nothing is done, and we are left with a copy of the napi_hash_node from the old channel. When we later call napi_hash_del() on this channel we have a stale napi_hash_node. Corruption is only seen when there are multiple entries in one of the napi_hash lists. This is made more likely by having a very large number of channels. Testing was carried out with 512 channels - 32 channels on each of 16 ports. This failure typically appears as protection faults within napi_by_id() or napi_hash_add(). efx_copy_channel() is only used when tx or rx ring sizes are changed (ethtool -G). Fixes: 36763266bbe8 ("sfc: Add support for busy polling") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 3cf3557106c2..6b89e4a7b164 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -485,6 +485,9 @@ efx_copy_channel(const struct efx_channel *old_channel) *channel = *old_channel; channel->napi_dev = NULL; + INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); + channel->napi_str.napi_id = 0; + channel->napi_str.state = 0; memset(&channel->eventq, 0, sizeof(channel->eventq)); for (j = 0; j < EFX_TXQ_TYPES; j++) { From b7f193da17fb18b752bef77ce52eb49723299bd8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:45 -0600 Subject: [PATCH 186/305] ibmvnic: Unmap ibmvnic_statistics structure This structure was mapped but never subsequently unmapped. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f6c9b6d38ac7..921c40fad1c3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3844,6 +3844,9 @@ static int ibmvnic_remove(struct vio_dev *dev) if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir)) debugfs_remove_recursive(adapter->debugfs_dir); + dma_unmap_single(&dev->dev, adapter->stats_token, + sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); + if (adapter->ras_comps) dma_free_coherent(&dev->dev, adapter->ras_comp_num * From e1fac0adf0f9b2c1eb49e658e6ed070a744bbaef Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:46 -0600 Subject: [PATCH 187/305] ibmvnic: Fix size of debugfs name buffer This mistake was causing debugfs directory creation failures when multiple ibmvnic devices were probed. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 921c40fad1c3..4f3281a03e7e 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3705,7 +3705,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; unsigned char *mac_addr_p; struct dentry *ent; - char buf[16]; /* debugfs name buf */ + char buf[17]; /* debugfs name buf */ int rc; dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", From 7774d46b2037b98d3f7e414bffb1d53082dc139b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:44:06 +0000 Subject: [PATCH 188/305] net: ethernet: ixp4xx_eth: fix spelling mistake in debug message Trivial fix to spelling mistake "successed" to "succeeded" in debug message. Also unwrap multi-line literal string. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 7f127dc1b7ba..fa32391720fe 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -708,8 +708,7 @@ static int eth_poll(struct napi_struct *napi, int budget) if (!qmgr_stat_below_low_watermark(rxq) && napi_reschedule(napi)) { /* not empty again */ #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll" - " napi_reschedule successed\n", + printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n", dev->name); #endif qmgr_disable_irq(rxq); From cedecbc5e0f39d2987b8e1004908e90459a82e78 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:00 +0000 Subject: [PATCH 189/305] ntb_pingpong: Fix db_init parameter description Fix 'db_init' parameter description. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 7d311799fca1..435861189d97 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -88,7 +88,7 @@ MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); static unsigned long db_init = 0x7; module_param(db_init, ulong, 0644); -MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer"); +MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer"); struct pp_ctx { struct ntb_dev *ntb; From 49b89de41f8d97eb13a60c1865ed61fbebed0d15 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:42 +0000 Subject: [PATCH 190/305] NTB: ntb_hw_intel: Fix typo in module parameter descriptions Fix typo in module parameter descriptions. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 0d5c29ae51de..1ee61d92c54b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -112,17 +112,17 @@ MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, module_param_named(xeon_b2b_usd_bar4_addr64, xeon_b2b_usd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr64, "XEON B2B USD BAR 4 64-bit address"); module_param_named(xeon_b2b_usd_bar4_addr32, xeon_b2b_usd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr32, "XEON B2B USD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_usd_bar5_addr32, xeon_b2b_usd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar5_addr32, "XEON B2B USD split-BAR 5 32-bit address"); module_param_named(xeon_b2b_dsd_bar2_addr64, @@ -132,17 +132,17 @@ MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, module_param_named(xeon_b2b_dsd_bar4_addr64, xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr64, "XEON B2B DSD BAR 4 64-bit address"); module_param_named(xeon_b2b_dsd_bar4_addr32, xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr32, "XEON B2B DSD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_dsd_bar5_addr32, xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32, "XEON B2B DSD split-BAR 5 32-bit address"); #ifndef ioread64 From c0a88032ef8e6814d4dd84551e5f333c1de639b3 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:35 +0200 Subject: [PATCH 191/305] ntb_transport: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 8601c10acf74..4eb8adb34508 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -257,7 +257,7 @@ enum { #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 #define DMA_RETRIES 20 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) static void ntb_transport_rxc_db(unsigned long data); static const struct ntb_ctx_ops ntb_transport_ops; From cdc08982a5f334cecc15d802464588115512cc36 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:36 +0200 Subject: [PATCH 192/305] ntb: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 6a50f20bf1cd..e065b695200d 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -72,7 +72,7 @@ #define MAX_THREADS 32 #define MAX_TEST_SIZE SZ_1M #define MAX_SRCS 32 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) #define DMA_RETRIES 20 #define SZ_4G (1ULL << 32) #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ From 25ea9f2bf5f76082da919f2a91ea8d920932c1da Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 27 Oct 2016 11:06:44 -0700 Subject: [PATCH 193/305] ntb: ntb_hw_intel: init peer_addr in struct intel_ntb_dev The peer_addr member of intel_ntb_dev is not set, therefore when acquiring ntb_peer_db and ntb_peer_spad we only get the offset rather than the actual physical address. Adding fix to correct that. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 1ee61d92c54b..7310a261c858 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1755,6 +1755,8 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, XEON_B2B_MIN_SIZE); if (!ndev->peer_mmio) return -EIO; + + ndev->peer_addr = pci_resource_start(pdev, b2b_bar); } return 0; @@ -2019,6 +2021,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) goto err_mmio; } ndev->peer_mmio = ndev->self_mmio; + ndev->peer_addr = pci_resource_start(pdev, 0); return 0; From 819baf885953b588b63bef28e5598daf9ed4ddf9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 10:34:18 +0300 Subject: [PATCH 194/305] ntb_perf: potential info leak in debugfs This is a static checker warning, not something I'm desperately concerned about. But snprintf() returns the number of bytes that would have been copied if there were space. We really care about the number of bytes that actually were copied so we should use scnprintf() instead. It probably won't overrun, and in that case we may as well just use sprintf() but these sorts of things make static checkers and code reviewers happier. Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index e065b695200d..e75d4fdc0866 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -589,7 +589,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, return -ENOMEM; if (mutex_is_locked(&perf->run_mutex)) { - out_off = snprintf(buf, 64, "running\n"); + out_off = scnprintf(buf, 64, "running\n"); goto read_from_buf; } @@ -600,14 +600,14 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, break; if (pctx->status) { - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: error %d\n", i, pctx->status); continue; } rate = div64_u64(pctx->copied, pctx->diff_us); - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", i, pctx->copied, pctx->diff_us, rate); } From fa14a0acea1ffe67913ba384a2897130a36dfe03 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 1 Nov 2016 18:36:46 +0200 Subject: [PATCH 195/305] nvmet-rdma: Fix possible NULL deref when handling rdma cm events When we initiate queue teardown sequence we call rdma_destroy_qp which clears cm_id->qp, afterwards we call rdma_destroy_id, but we might see a rdma_cm event in between with a cleared cm_id->qp so watch out for that and silently ignore the event because this means that the queue teardown sequence is in progress. Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f8d23999e0f2..cf60759cc169 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1352,7 +1352,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - nvmet_rdma_queue_disconnect(queue); + /* + * We might end up here when we already freed the qp + * which means queue release sequence is in progress, + * so don't get in the way... + */ + if (queue) + nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); From 553cd9ef82edd811948782a8f73ae73c4bfeedd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Nov 2016 08:49:18 -0600 Subject: [PATCH 196/305] nvme-rdma: reject non-connect commands before the queue is live If we reconncect we might have command queue up that get resent as soon as the queue is restarted. But until the connect command succeeded we can't send other command. Add a new flag that marks a queue as live when connect finishes, and delay any non-connect command until the queue is live based on it. Signed-off-by: Christoph Hellwig Reported-by: Steve Wise Tested-by: Steve Wise [sagig: fixes admin queue LIVE setting] Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5a8388177959..438d6948895f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), NVME_RDMA_Q_DELETING = (1 << 2), + NVME_RDMA_Q_LIVE = (1 << 3), }; struct nvme_rdma_queue { @@ -626,6 +627,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -712,6 +714,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto stop_admin_q; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (ret) goto stop_admin_q; @@ -761,8 +765,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_stop_keep_alive(&ctrl->ctrl); - for (i = 0; i < ctrl->queue_count; i++) + for (i = 0; i < ctrl->queue_count; i++) { clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); + } if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); @@ -1378,6 +1384,24 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +/* + * We cannot accept any other command until the Connect command has completed. + */ +static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { + struct nvme_command *cmd = (struct nvme_command *)rq->cmd; + + if (rq->cmd_type != REQ_TYPE_DRV_PRIV || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) + return false; + } + + return true; +} + static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1394,6 +1418,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); + if (!nvme_rdma_queue_is_ready(queue, rq)) + return BLK_MQ_RQ_QUEUE_BUSY; + dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1544,6 +1571,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); if (error) { dev_err(ctrl->ctrl.device, From 8242ddac1bfcf6eb8873b4d0a4e7a172c2b5b625 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:30 +0200 Subject: [PATCH 197/305] nvmet: Don't queue fatal error work if csts.cfs is set In the transport, in case of an interal queue error like error completion in rdma we trigger a fatal error. However, multiple queues in the same controller can serr error completions and we don't want to trigger fatal error work more than once. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b4cacb6f0258..a21437a33adb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -838,9 +838,13 @@ static void nvmet_fatal_error_handler(struct work_struct *work) void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) { - ctrl->csts |= NVME_CSTS_CFS; - INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); - schedule_work(&ctrl->fatal_err_work); + mutex_lock(&ctrl->lock); + if (!(ctrl->csts & NVME_CSTS_CFS)) { + ctrl->csts |= NVME_CSTS_CFS; + INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); + schedule_work(&ctrl->fatal_err_work); + } + mutex_unlock(&ctrl->lock); } EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); From 766dbb179d41d6337fed2b3ca00caa5845d298ce Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:09:49 +0200 Subject: [PATCH 198/305] nvmet-rdma: don't forget to delete a queue from the list of connection failed In case we accepted a queue connection and it failed, we might not remove the queue from the list until we unload and clean it up. We should delete it from the queue list on the relevant handler. Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index cf60759cc169..8c06675c2305 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1066,6 +1066,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, spin_lock_init(&queue->rsp_wr_wait_lock); INIT_LIST_HEAD(&queue->free_rsps); spin_lock_init(&queue->rsps_lock); + INIT_LIST_HEAD(&queue->queue_list); queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); if (queue->idx < 0) { @@ -1269,7 +1270,12 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, { WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING); - pr_err("failed to connect queue\n"); + mutex_lock(&nvmet_rdma_queue_mutex); + if (!list_empty(&queue->queue_list)) + list_del_init(&queue->queue_list); + mutex_unlock(&nvmet_rdma_queue_mutex); + + pr_err("failed to connect queue %d\n", queue->idx); schedule_work(&queue->release_work); } From c8dbc37cd81d4705fce51123f5d81ea3267a5b88 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 8 Nov 2016 09:16:02 -0800 Subject: [PATCH 199/305] nvme-rdma: stop and free io queues on connect failure While testing nvme-rdma with the spdk nvmf target over iw_cxgb4, I configured the target (mistakenly) to generate an error creating the NVMF IO queues. This resulted a "Invalid SQE Parameter" error sent back to the host on the first IO queue connect: [ 9610.928182] nvme nvme1: queue_size 128 > ctrl maxcmd 120, clamping down [ 9610.938745] nvme nvme1: creating 32 I/O queues. So nvmf_connect_io_queue() returns an error to nvmf_connect_io_queue() / nvmf_connect_io_queues(), and that is returned to nvme_rdma_create_io_queues(). In the error path, nvmf_rdma_create_io_queues() frees the queue tagset memory _before_ stopping and freeing the IB queues, which causes yet another touch-after-free crash due to SQ CQEs being flushed after the ib_cqe structs pointed-to by the flushed WRs have been freed (since they are part of the nvme_rdma_request struct). The fix is to stop and free the queues in nvmf_connect_io_queues() if there is an error connecting any of the queues. Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 438d6948895f..3d25add36d91 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -625,11 +625,18 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - break; + if (ret) { + dev_info(ctrl->ctrl.device, + "failed to connect i/o queue: %d\n", ret); + goto out_free_queues; + } set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } + return 0; + +out_free_queues: + nvme_rdma_free_io_queues(ctrl); return ret; } From 14c862dbb0a0e0a9baec20480d441e32cb54b2b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:59 +0200 Subject: [PATCH 200/305] nvmet-rdma: drain the queue-pair just before freeing it draining the qp right after disconnect might not suffice because the nvmet sq is not fully drained (in nvmet_sq_destroy) and we might see completions after the drain. Instead, drain right before the qp destroy which comes after the sq destruction and we can be sure that no posts come after the drain. Tested-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 8c06675c2305..005ef5d17a19 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -951,6 +951,7 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { + ib_drain_qp(queue->cm_id->qp); rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->cq); } @@ -1245,7 +1246,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->cm_id->qp); schedule_work(&queue->release_work); } } From f732c5b7c734cfc2c563c918fe2842175c7eb073 Mon Sep 17 00:00:00 2001 From: Axl-zhang Date: Wed, 2 Nov 2016 13:31:12 +0800 Subject: [PATCH 201/305] dmaengine: sun6i: fix the uninitialized value for v_lli dma_pool_alloc does not initialize the value of the newly allocated block for the v_lli, and the uninitilize value make the tests failed which is on pine64 with dmatest. we can fix it just change the "|=" to "=" for the v_lli->cfg. Signed-off-by: Hao Zhang Acked-by: Maxime Ripard Signed-off-by: Vinod Koul --- drivers/dma/sun6i-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 83461994e418..a2358780ab2c 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -578,7 +578,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( burst = convert_burst(8); width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); - v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + v_lli->cfg = DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | From 12f5908080bdccca2cb2f7ad850cb360c92f481a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:58 -0700 Subject: [PATCH 202/305] dmaengine: cppi41: Fix list not empty warning on module removal If musb controller is configured with USB peripherals and we have enumerated with a USB host, we can get warnings on removal of the modules: WARNING: CPU: 0 PID: 1269 at drivers/dma/cppi41.c:391 cppi41_dma_free_chan_resources Fix the issue by adding the missing pm_runtime_get to cppi41_dma_free_chan_resources to make sure the pending work list is cleared on removal. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index bac5f023013b..6ed99d926358 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -1072,7 +1072,12 @@ err_get_sync: static int cppi41_dma_remove(struct platform_device *pdev) { struct cppi41_dd *cdd = platform_get_drvdata(pdev); + int error; + error = pm_runtime_get_sync(&pdev->dev); + if (error < 0) + dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", + __func__, error); of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&cdd->ddev); From 098de42ad6708866501a00155ba85350bc0b29e5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:59 -0700 Subject: [PATCH 203/305] dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected On am335x with musb host we can end up with unpaired pm runtime calls if a hub with no devices is connected and disconnected. This is because of the conditional pm runtime calls which are always a bad idea. Let's fix the issue by making them unconditional and paired in each function. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 6ed99d926358..3d2d8b5a6c91 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -318,6 +318,11 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + status = pm_runtime_get(cdd->ddev.dev); + if (status < 0) + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, status); + q_num = __fls(val); val &= ~(1 << q_num); q_num += 32 * i; @@ -338,7 +343,6 @@ static irqreturn_t cppi41_irq(int irq, void *data) dma_cookie_complete(&c->txd); dmaengine_desc_get_callback_invoke(&c->txd, NULL); - /* Paired with cppi41_dma_issue_pending */ pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); } @@ -460,7 +464,6 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) struct cppi41_dd *cdd = c->cdd; int error; - /* PM runtime paired with dmaengine_desc_get_callback_invoke */ error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", @@ -473,6 +476,9 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) push_desc_queue(c); else pending_desc(c); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); } static u32 get_host_pd0(u32 length) From 740b4be3f742100ea66f0f9ee9715b10ee304a90 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 11 Nov 2016 11:28:52 -0800 Subject: [PATCH 204/305] dmaengine: cpp41: Fix handling of error path If we return early on pm_runtime_get() error, we need to also call pm_runtime_put_noidle() as pointed out in a musb related thread by Johan Hovold . This is to keep the PM runtime use counts happy. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Johan Hovold Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 3d2d8b5a6c91..4b52126c13cf 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -366,8 +366,11 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return error; + } dma_cookie_init(chan); dma_async_tx_descriptor_init(&c->txd, chan); @@ -389,8 +392,11 @@ static void cppi41_dma_free_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return; + } WARN_ON(!list_empty(&cdd->pending)); @@ -466,6 +472,7 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", error); From ea08e39230e898844d9de5b60cdbb30067cebfe7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Nov 2016 13:16:22 -0500 Subject: [PATCH 205/305] sunrpc: svc_age_temp_xprts_now should not call setsockopt non-tcp transports This fixes the following panic that can occur with NFSoRDMA. general protection fault: 0000 [#1] SMP Modules linked in: rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx5_ib ib_core intel_powerclamp coretemp kvm_intel kvm sg ioatdma ipmi_devintf ipmi_ssif dcdbas iTCO_wdt iTCO_vendor_support pcspkr irqbypass sb_edac shpchp dca crc32_pclmul ghash_clmulni_intel edac_core lpc_ich aesni_intel lrw gf128mul glue_helper ablk_helper mei_me mei ipmi_si cryptd wmi ipmi_msghandler acpi_pad acpi_power_meter nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt ahci fb_sys_fops ttm libahci mlx5_core tg3 crct10dif_pclmul drm crct10dif_common ptp i2c_core libata crc32c_intel pps_core fjes dm_mirror dm_region_hash dm_log dm_mod CPU: 1 PID: 120 Comm: kworker/1:1 Not tainted 3.10.0-514.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R320/0KM5PX, BIOS 2.4.2 01/29/2015 Workqueue: events check_lifetime task: ffff88031f506dd0 ti: ffff88031f584000 task.ti: ffff88031f584000 RIP: 0010:[] [] _raw_spin_lock_bh+0x17/0x50 RSP: 0018:ffff88031f587ba8 EFLAGS: 00010206 RAX: 0000000000020000 RBX: 20041fac02080072 RCX: ffff88031f587fd8 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 20041fac02080072 RBP: ffff88031f587bb0 R08: 0000000000000008 R09: ffffffff8155be77 R10: ffff880322a59b00 R11: ffffea000bf39f00 R12: 20041fac02080072 R13: 000000000000000d R14: ffff8800c4fbd800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880322a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c52d4547e CR3: 00000000019ba000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 20041fac02080002 ffff88031f587bd0 ffffffff81557830 20041fac02080002 ffff88031f587c78 ffff88031f587c40 ffffffff8155ae08 000000010157df32 0000000800000001 ffff88031f587c20 ffffffff81096acb ffffffff81aa37d0 Call Trace: [] lock_sock_nested+0x20/0x50 [] sock_setsockopt+0x78/0x940 [] ? lock_timer_base.isra.33+0x2b/0x50 [] kernel_setsockopt+0x4d/0x50 [] svc_age_temp_xprts_now+0x174/0x1e0 [sunrpc] [] nfsd_inetaddr_event+0x9d/0xd0 [nfsd] [] notifier_call_chain+0x4c/0x70 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x168/0x2d0 [] check_lifetime+0x25f/0x270 [] process_one_work+0x17b/0x470 [] worker_thread+0x126/0x410 [] ? rescuer_thread+0x460/0x460 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: ca 75 f1 5d c3 0f 1f 80 00 00 00 00 eb d9 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 7e 04 a0 ff b8 00 00 02 00 0f c1 03 89 c2 c1 ea 10 66 39 c2 75 03 5b 5d c3 83 e2 fe 0f RIP [] _raw_spin_lock_bh+0x17/0x50 RSP Signed-off-by: Scott Mayhew Fixes: c3d4879e ("sunrpc: Add a function to close temporary transports immediately") Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 11 +---------- net/sunrpc/svcsock.c | 21 +++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ab02a457da1f..e5d193440374 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c3f652395a80..3bc1d61694cb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure) void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) { struct svc_xprt *xprt; - struct svc_sock *svsk; - struct socket *sock; struct list_head *le, *next; LIST_HEAD(to_be_closed); - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; spin_lock_bh(&serv->sv_lock); list_for_each_safe(le, next, &serv->sv_tempsocks) { @@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_close_xprt(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..a4bc98265d88 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -438,6 +438,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); } +static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) +{ + struct svc_sock *svsk; + struct socket *sock; + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -648,6 +663,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) return NULL; } +static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + static struct svc_xprt *svc_udp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -667,6 +686,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt, }; static struct svc_xprt_class svc_udp_class = { @@ -1242,6 +1262,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6864fb967038..1334de2715c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); +static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, @@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, .xpo_secure_port = svc_rdma_secure_port, + .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt, }; struct svc_xprt_class svc_rdma_class = { @@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } +static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; From b15efc38626f20f3fc8b831b826b50740d90dab9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 14 Nov 2016 11:14:37 -0200 Subject: [PATCH 206/305] gp8psk-fe: add missing MODULE_foo() macros This file was converted to a separate module at commit 7a0786c19d65 ("gp8psk: Fix DVB frontend attach"), because the DVB attach routines require it to work. However, I forgot to copy the MODULE_foo() macros from the original module, causing this warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/dvb-frontends/gp8psk-fe.o Reported-by: Stephen Rothwell Fixes: 7a0786c19d65 ("gp8psk: Fix DVB frontend attach") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/dvb-frontends/gp8psk-fe.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/media/dvb-frontends/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c index be19afeed7a9..93f59bfea092 100644 --- a/drivers/media/dvb-frontends/gp8psk-fe.c +++ b/drivers/media/dvb-frontends/gp8psk-fe.c @@ -1,5 +1,5 @@ -/* DVB USB compliant Linux driver for the - * - GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module +/* + * Frontend driver for the GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module * * Copyright (C) 2006,2007 Alan Nisota (alannisota@gmail.com) * Copyright (C) 2006,2007 Genpix Electronics (genpix@genpix-electronics.com) @@ -8,11 +8,9 @@ * * This module is based off the vp7045 and vp702x modules * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, version 2. - * - * see Documentation/dvb/README.dvb-usb for more information + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -395,3 +393,8 @@ static struct dvb_frontend_ops gp8psk_fe_ops = { .dishnetwork_send_legacy_command = gp8psk_fe_send_legacy_dish_cmd, .enable_high_lnb_voltage = gp8psk_fe_enable_high_lnb_voltage }; + +MODULE_AUTHOR("Alan Nisota "); +MODULE_DESCRIPTION("Frontend Driver for Genpix DVB-S"); +MODULE_VERSION("1.1"); +MODULE_LICENSE("GPL"); From 93d710a65ef02fb7fd48ae207e78f460bd7a6089 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:34:17 +0100 Subject: [PATCH 207/305] i2c: mux: fix up dependencies We get the following build error from UM Linux after adding an entry to drivers/iio/gyro/Kconfig that issues "select I2C_MUX": ERROR: "devm_ioremap_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! ERROR: "of_address_to_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! It appears that the I2C mux core code depends on HAS_IOMEM for historical reasons, while CONFIG_I2C_MUX_REG does *not* have a direct dependency on HAS_IOMEM. This creates a situation where a allyesconfig or allmodconfig for UM Linux will select I2C_MUX, and will implicitly enable I2C_MUX_REG as well, and the compilation will fail for the register driver. Fix this up by making I2C_MUX_REG depend on HAS_IOMEM and removing the dependency from I2C_MUX. Reported-by: kbuild test robot Reported-by: Jonathan Cameron Signed-off-by: Linus Walleij Acked-by: Jonathan Cameron Acked-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/Kconfig | 1 - drivers/i2c/muxes/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index d223650a97e4..11edabf425ae 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -59,7 +59,6 @@ config I2C_CHARDEV config I2C_MUX tristate "I2C bus multiplexing support" - depends on HAS_IOMEM help Say Y here if you want the I2C core to support the ability to handle multiplexed I2C bus topologies, by presenting each diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index e280c8ecc0b5..96de9ce5669b 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -63,6 +63,7 @@ config I2C_MUX_PINCTRL config I2C_MUX_REG tristate "Register-based I2C multiplexer" + depends on HAS_IOMEM help If you say yes to this option, support will be included for a register based I2C multiplexer. This driver provides access to From f5c9f9c72395c3291c2e35c905dedae2b98475a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:31:52 -0800 Subject: [PATCH 208/305] Revert "printk: make reading the kernel log flush pending lines" This reverts commit bfd8d3f23b51018388be0411ccbc2d56277fe294. It turns out that this flushes things much too aggressiverly, and causes lines to break up when the system logger races with new continuation lines being printed. There's a pending patch to make printk() flushing much more straightforward, but it's too invasive for 4.9, so in the meantime let's just not make the system message logging flush continuation lines. They'll be flushed by the final newline anyway. Suggested-by: Petr Mladek Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5028f4fd504a..f7a55e9ff2f7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -783,8 +783,6 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } -static void cont_flush(void); - static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -800,7 +798,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; @@ -863,7 +860,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) return -ESPIPE; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); switch (whence) { case SEEK_SET: /* the first record */ @@ -902,7 +898,6 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -1289,7 +1284,6 @@ static int syslog_print(char __user *buf, int size) size_t skip; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1349,7 +1343,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (buf) { u64 next_seq; u64 seq; @@ -1511,7 +1504,6 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -3028,7 +3020,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) dumper->active = true; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; @@ -3119,7 +3110,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, bool ret; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); raw_spin_unlock_irqrestore(&logbuf_lock, flags); @@ -3162,7 +3152,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; From ee2bd216e1fa9fa980e6ac702e5973d157c40c48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:46:08 -0800 Subject: [PATCH 209/305] ASoC: lpass-platform: fix uninitialized variable In commit 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") the stream specific information initialization was broken, with the dma channel information not being initialized if there was no alloc_dma_channel() helper function. Before that, the DMA channel number was implicitly initialized to zero because the backing store was allocated with devm_kzalloc(). When the init code was rewritten, that implicit initialization was lost, and gcc rightfully complains about an uninitialized variable being used. Cc: Srinivas Kandagatla Cc: Mark Brown Signed-off-by: Linus Torvalds --- sound/soc/qcom/lpass-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 07000f53db44..b392e51de94d 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -75,6 +75,7 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) data->i2s_port = cpu_dai->driver->id; runtime->private_data = data; + dma_ch = 0; if (v->alloc_dma_channel) dma_ch = v->alloc_dma_channel(drvdata, dir); if (dma_ch < 0) From 7020637bdf59589a403e01aca128bef643404317 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:20:30 +0000 Subject: [PATCH 210/305] ps3_gelic: fix spelling mistake in debug message Trivial fix to spelling mistake "unmached" to "unmatched" in debug message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 446ea580ad42..928c1dca2673 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1694,7 +1694,7 @@ struct gelic_wl_scan_info *gelic_wl_find_best_bss(struct gelic_wl_info *wl) pr_debug("%s: bssid matched\n", __func__); break; } else { - pr_debug("%s: bssid unmached\n", __func__); + pr_debug("%s: bssid unmatched\n", __func__); continue; } } From e2174b0c24caca170ca61eda2ae49c9561ff8896 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Nov 2016 20:56:17 +0100 Subject: [PATCH 211/305] Revert "ACPICA: FADT support cleanup" Pavel Machek reports that commit 6ea8c546f365 (ACPICA: FADT support cleanup) breaks thermal management on his Thinkpad X60 and T40p, so revert it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=187311 Fixes: 6ea8c546f365 (ACPICA: FADT support cleanup) Reported-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbfadt.c | 10 +-- include/acpi/actbl.h | 164 +++++++++++++++-------------------- 2 files changed, 74 insertions(+), 100 deletions(-) diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 046c4d0394ee..5fb838e592dc 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -480,19 +480,17 @@ static void acpi_tb_convert_fadt(void) u32 i; /* - * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which + * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which * should be zero are indeed zero. This will workaround BIOSs that * inadvertently place values in these fields. * * The ACPI 1.0 reserved fields that will be zeroed are the bytes located * at offset 45, 55, 95, and the word located at offset 109, 110. * - * Note: The FADT revision value is unreliable because of BIOS errors. - * The table length is instead used as the final word on the version. - * - * Note: FADT revision 3 is the ACPI 2.0 version of the FADT. + * Note: The FADT revision value is unreliable. Only the length can be + * trusted. */ - if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) { + if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) { acpi_gbl_FADT.preferred_profile = 0; acpi_gbl_FADT.pstate_control = 0; acpi_gbl_FADT.cst_control = 0; diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1b949e08015c..c19700e2a2fe 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -230,72 +230,62 @@ struct acpi_table_facs { /* Fields common to all versions of the FADT */ struct acpi_table_fadt { - struct acpi_table_header header; /* [V1] Common ACPI table header */ - u32 facs; /* [V1] 32-bit physical address of FACS */ - u32 dsdt; /* [V1] 32-bit physical address of DSDT */ - u8 model; /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ - u8 preferred_profile; /* [V1] Conveys preferred power management profile to OSPM. */ - u16 sci_interrupt; /* [V1] System vector of SCI interrupt */ - u32 smi_command; /* [V1] 32-bit Port address of SMI command port */ - u8 acpi_enable; /* [V1] Value to write to SMI_CMD to enable ACPI */ - u8 acpi_disable; /* [V1] Value to write to SMI_CMD to disable ACPI */ - u8 s4_bios_request; /* [V1] Value to write to SMI_CMD to enter S4BIOS state */ - u8 pstate_control; /* [V1] Processor performance state control */ - u32 pm1a_event_block; /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */ - u32 pm1b_event_block; /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */ - u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */ - u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */ - u32 pm2_control_block; /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */ - u32 pm_timer_block; /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ - u32 gpe0_block; /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */ - u32 gpe1_block; /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */ - u8 pm1_event_length; /* [V1] Byte Length of ports at pm1x_event_block */ - u8 pm1_control_length; /* [V1] Byte Length of ports at pm1x_control_block */ - u8 pm2_control_length; /* [V1] Byte Length of ports at pm2_control_block */ - u8 pm_timer_length; /* [V1] Byte Length of ports at pm_timer_block */ - u8 gpe0_block_length; /* [V1] Byte Length of ports at gpe0_block */ - u8 gpe1_block_length; /* [V1] Byte Length of ports at gpe1_block */ - u8 gpe1_base; /* [V1] Offset in GPE number space where GPE1 events start */ - u8 cst_control; /* [V1] Support for the _CST object and C-States change notification */ - u16 c2_latency; /* [V1] Worst case HW latency to enter/exit C2 state */ - u16 c3_latency; /* [V1] Worst case HW latency to enter/exit C3 state */ - u16 flush_size; /* [V1] Processor memory cache line width, in bytes */ - u16 flush_stride; /* [V1] Number of flush strides that need to be read */ - u8 duty_offset; /* [V1] Processor duty cycle index in processor P_CNT reg */ - u8 duty_width; /* [V1] Processor duty cycle value bit width in P_CNT register */ - u8 day_alarm; /* [V1] Index to day-of-month alarm in RTC CMOS RAM */ - u8 month_alarm; /* [V1] Index to month-of-year alarm in RTC CMOS RAM */ - u8 century; /* [V1] Index to century in RTC CMOS RAM */ - u16 boot_flags; /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */ - u8 reserved; /* [V1] Reserved, must be zero */ - u32 flags; /* [V1] Miscellaneous flag bits (see below for individual flags) */ - /* End of Version 1 FADT fields (ACPI 1.0) */ - - struct acpi_generic_address reset_register; /* [V3] 64-bit address of the Reset register */ - u8 reset_value; /* [V3] Value to write to the reset_register port to reset the system */ - u16 arm_boot_flags; /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ - u8 minor_revision; /* [V5] FADT Minor Revision (ACPI 5.1) */ - u64 Xfacs; /* [V3] 64-bit physical address of FACS */ - u64 Xdsdt; /* [V3] 64-bit physical address of DSDT */ - struct acpi_generic_address xpm1a_event_block; /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */ - struct acpi_generic_address xpm1b_event_block; /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */ - struct acpi_generic_address xpm1a_control_block; /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */ - struct acpi_generic_address xpm1b_control_block; /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */ - struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */ - struct acpi_generic_address xpm_timer_block; /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ - struct acpi_generic_address xgpe0_block; /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */ - struct acpi_generic_address xgpe1_block; /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */ - /* End of Version 3 FADT fields (ACPI 2.0) */ - - struct acpi_generic_address sleep_control; /* [V4] 64-bit Sleep Control register (ACPI 5.0) */ - /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */ - - struct acpi_generic_address sleep_status; /* [V5] 64-bit Sleep Status register (ACPI 5.0) */ - /* End of Version 5 FADT fields (ACPI 5.0) */ - - u64 hypervisor_id; /* [V6] Hypervisor Vendor ID (ACPI 6.0) */ - /* End of Version 6 FADT fields (ACPI 6.0) */ - + struct acpi_table_header header; /* Common ACPI table header */ + u32 facs; /* 32-bit physical address of FACS */ + u32 dsdt; /* 32-bit physical address of DSDT */ + u8 model; /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ + u8 preferred_profile; /* Conveys preferred power management profile to OSPM. */ + u16 sci_interrupt; /* System vector of SCI interrupt */ + u32 smi_command; /* 32-bit Port address of SMI command port */ + u8 acpi_enable; /* Value to write to SMI_CMD to enable ACPI */ + u8 acpi_disable; /* Value to write to SMI_CMD to disable ACPI */ + u8 s4_bios_request; /* Value to write to SMI_CMD to enter S4BIOS state */ + u8 pstate_control; /* Processor performance state control */ + u32 pm1a_event_block; /* 32-bit port address of Power Mgt 1a Event Reg Blk */ + u32 pm1b_event_block; /* 32-bit port address of Power Mgt 1b Event Reg Blk */ + u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */ + u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */ + u32 pm2_control_block; /* 32-bit port address of Power Mgt 2 Control Reg Blk */ + u32 pm_timer_block; /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ + u32 gpe0_block; /* 32-bit port address of General Purpose Event 0 Reg Blk */ + u32 gpe1_block; /* 32-bit port address of General Purpose Event 1 Reg Blk */ + u8 pm1_event_length; /* Byte Length of ports at pm1x_event_block */ + u8 pm1_control_length; /* Byte Length of ports at pm1x_control_block */ + u8 pm2_control_length; /* Byte Length of ports at pm2_control_block */ + u8 pm_timer_length; /* Byte Length of ports at pm_timer_block */ + u8 gpe0_block_length; /* Byte Length of ports at gpe0_block */ + u8 gpe1_block_length; /* Byte Length of ports at gpe1_block */ + u8 gpe1_base; /* Offset in GPE number space where GPE1 events start */ + u8 cst_control; /* Support for the _CST object and C-States change notification */ + u16 c2_latency; /* Worst case HW latency to enter/exit C2 state */ + u16 c3_latency; /* Worst case HW latency to enter/exit C3 state */ + u16 flush_size; /* Processor memory cache line width, in bytes */ + u16 flush_stride; /* Number of flush strides that need to be read */ + u8 duty_offset; /* Processor duty cycle index in processor P_CNT reg */ + u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */ + u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */ + u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */ + u8 century; /* Index to century in RTC CMOS RAM */ + u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */ + u8 reserved; /* Reserved, must be zero */ + u32 flags; /* Miscellaneous flag bits (see below for individual flags) */ + struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */ + u8 reset_value; /* Value to write to the reset_register port to reset the system */ + u16 arm_boot_flags; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ + u8 minor_revision; /* FADT Minor Revision (ACPI 5.1) */ + u64 Xfacs; /* 64-bit physical address of FACS */ + u64 Xdsdt; /* 64-bit physical address of DSDT */ + struct acpi_generic_address xpm1a_event_block; /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ + struct acpi_generic_address xpm1b_event_block; /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ + struct acpi_generic_address xpm1a_control_block; /* 64-bit Extended Power Mgt 1a Control Reg Blk address */ + struct acpi_generic_address xpm1b_control_block; /* 64-bit Extended Power Mgt 1b Control Reg Blk address */ + struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */ + struct acpi_generic_address xpm_timer_block; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ + struct acpi_generic_address xgpe0_block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */ + struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ + struct acpi_generic_address sleep_control; /* 64-bit Sleep Control register (ACPI 5.0) */ + struct acpi_generic_address sleep_status; /* 64-bit Sleep Status register (ACPI 5.0) */ + u64 hypervisor_id; /* Hypervisor Vendor ID (ACPI 6.0) */ }; /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */ @@ -311,8 +301,8 @@ struct acpi_table_fadt { /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */ -#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5] PSCI 0.2+ is implemented */ -#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */ +#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5+] PSCI 0.2+ is implemented */ +#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */ /* Masks for FADT flags */ @@ -409,34 +399,20 @@ struct acpi_table_desc { * match the expected length. In other words, the length of the * FADT is the bottom line as to what the version really is. * - * NOTE: There is no officialy released V2 of the FADT. This - * version was used only for prototyping and testing during the - * 32-bit to 64-bit transition. V3 was the first official 64-bit - * version of the FADT. - * - * Update this list of defines when a new version of the FADT is - * added to the ACPI specification. Note that the FADT version is - * only incremented when new fields are appended to the existing - * version. Therefore, the FADT version is competely independent - * from the version of the ACPI specification where it is - * defined. - * - * For reference, the various FADT lengths are as follows: - * FADT V1 size: 0x074 ACPI 1.0 - * FADT V3 size: 0x0F4 ACPI 2.0 - * FADT V4 size: 0x100 ACPI 3.0 and ACPI 4.0 - * FADT V5 size: 0x10C ACPI 5.0 - * FADT V6 size: 0x114 ACPI 6.0 + * For reference, the values below are as follows: + * FADT V1 size: 0x074 + * FADT V2 size: 0x084 + * FADT V3 size: 0x0F4 + * FADT V4 size: 0x0F4 + * FADT V5 size: 0x10C + * FADT V6 size: 0x114 */ -#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) /* ACPI 1.0 */ -#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) /* ACPI 2.0 */ -#define ACPI_FADT_V4_SIZE (u32) (ACPI_FADT_OFFSET (sleep_status)) /* ACPI 3.0 and ACPI 4.0 */ -#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) /* ACPI 5.0 */ -#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) /* ACPI 6.0 */ +#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) +#define ACPI_FADT_V2_SIZE (u32) (ACPI_FADT_OFFSET (minor_revision) + 1) +#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) +#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) +#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) -/* Update these when new FADT versions are added */ - -#define ACPI_FADT_MAX_VERSION 6 #define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)" #endif /* __ACTBL_H__ */ From 709fb1f961ea5c287107c3f903e81c9529224c8b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 14 Nov 2016 12:31:49 -0800 Subject: [PATCH 212/305] xtensa: wire up new pkey_{mprotect,alloc,free} syscalls Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/unistd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index de9b14b2d348..cd400af4a6b2 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -767,7 +767,14 @@ __SYSCALL(346, sys_preadv2, 6) #define __NR_pwritev2 347 __SYSCALL(347, sys_pwritev2, 6) -#define __NR_syscall_count 348 +#define __NR_pkey_mprotect 348 +__SYSCALL(348, sys_pkey_mprotect, 4) +#define __NR_pkey_alloc 349 +__SYSCALL(349, sys_pkey_alloc, 2) +#define __NR_pkey_free 350 +__SYSCALL(350, sys_pkey_free, 1) + +#define __NR_syscall_count 351 /* * sysxtensa syscall handler From 1bc2f5fac34535aeb3878ce32a762a221be7a851 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Mon, 14 Nov 2016 12:55:15 +0100 Subject: [PATCH 213/305] ARM: dts: omap5: board-common: fix wrong SMPS6 (VDD-DDR3) voltage DDR3L is usually specified as JEDEC standard 1.35V(1.28V~1.45V) & 1.5V(1.425V~1.575V) Therefore setting smps6 regulator to 1.2V is definitively below minimum. It appears that real world chips are more forgiving than data sheets indicate, but let's set the regulator right. Note: a board that uses other voltages (DDR with 1.5V) can overwrite by referencing &smps6_reg. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 60a33c4b7b82..4caadb253249 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -476,8 +476,8 @@ smps6_reg: smps6 { /* VDD_DDR3 - over VDD_SMPS6 */ regulator-name = "smps6"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1350000>; + regulator-max-microvolt = <1350000>; regulator-always-on; regulator-boot-on; }; From 6ff1a25318ebf688ef9593fe09cd449f6fb4ad31 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Nov 2016 21:46:47 +0100 Subject: [PATCH 214/305] ALSA: usb-audio: Fix use-after-free of usb_device at disconnect The usb-audio driver implements the deferred device disconnection for the device in use. In this mode, the disconnection callback returns immediately while the actual ALSA card object removal happens later when all files get closed. As Shuah reported, this code flow, however, leads to a use-after-free, detected by KASAN: BUG: KASAN: use-after-free in snd_usb_audio_free+0x134/0x160 [snd_usb_audio] at addr ffff8801c863ce10 Write of size 8 by task pulseaudio/2244 Call Trace: [] dump_stack+0x67/0x94 [] kasan_object_err+0x21/0x70 [] kasan_report_error+0x1fa/0x4e0 [] ? kasan_slab_free+0x87/0xb0 [] __asan_report_store8_noabort+0x43/0x50 [] ? snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_dev_free+0x31/0x40 [snd_usb_audio] [] __snd_device_free+0x12a/0x210 [] snd_device_free_all+0x85/0xd0 [] release_card_device+0x34/0x130 [] device_release+0x76/0x1e0 [] kobject_release+0x107/0x370 ..... Object at ffff8801c863cc80, in cache kmalloc-2048 size: 2048 Allocated: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_kmalloc+0xad/0xe0 [] kmem_cache_alloc_trace+0xfa/0x240 [] usb_alloc_dev+0x57/0xc90 [] hub_event+0xf1d/0x35f0 .... Freed: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_slab_free+0x71/0xb0 [] kfree+0xd9/0x280 [] usb_release_dev+0xde/0x110 [] device_release+0x76/0x1e0 .... It's the code trying to clear drvdata of the assigned usb_device where the usb_device itself was already released in usb_release_dev() after the disconnect callback. This patch fixes it by checking whether the code path is via the disconnect callback, i.e. chip->shutdown flag is set. Fixes: 79289e24194a ('ALSA: usb-audio: Refer to chip->usb_id for quirks...') Reported-and-tested-by: Shuah Khan Cc: # v4.6+ Signed-off-by: Takashi Iwai --- sound/usb/card.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 9e5276d6dda0..2ddc034673a8 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -315,7 +315,8 @@ static int snd_usb_audio_free(struct snd_usb_audio *chip) snd_usb_endpoint_free(ep); mutex_destroy(&chip->mutex); - dev_set_drvdata(&chip->dev->dev, NULL); + if (!atomic_read(&chip->shutdown)) + dev_set_drvdata(&chip->dev->dev, NULL); kfree(chip); return 0; } From 6ca595a70bc46e1a0eea3ee0681360f41555bfd9 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Mon, 14 Nov 2016 11:19:02 -0800 Subject: [PATCH 215/305] mailbox: PCC: Fix lockdep warning when request PCC channel This patch fixes the lockdep warning below DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1 at linux-next/kernel/locking/lockdep.c:2876 lockdep_trace_alloc+0xe0/0xf0 Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.8.0-11756-g86c5152 #46 ... Call trace: Exception stack(0xffff8007da837890 to 0xffff8007da8379c0) 7880: ffff8007da834000 0001000000000000 78a0: ffff8007da837a70 ffff0000081111a0 00000000600000c5 000000000000003d 78c0: 9374bc6a7f3c7832 0000000000381878 ffff000009db7ab8 000000000000002f 78e0: ffff00000811aabc ffff000008be2548 ffff8007da837990 ffff00000811adf8 7900: ffff8007da834000 00000000024080c0 00000000000000c0 ffff000009021000 7920: 0000000000000000 0000000000000000 ffff000008c8f7c8 ffff8007da579810 7940: 000000000000002f ffff8007da858000 0000000000000000 0000000000000001 7960: 0000000000000001 0000000000000000 ffff00000811a468 0000000000000002 7980: 656c62617369645f 0000000000038187 00000000000000ee ffff8007da837850 79a0: ffff000009db50c0 ffff000009db569d 0000000000000006 ffff000089db568f [] lockdep_trace_alloc+0xe0/0xf0 [] __kmalloc_track_caller+0x50/0x250 [] devres_alloc_node+0x28/0x60 [] devm_request_threaded_irq+0x50/0xe0 [] pcc_mbox_request_channel+0x110/0x170 [] acpi_cppc_processor_probe+0x264/0x414 [] __acpi_processor_start+0x28/0xa0 [] acpi_processor_start+0x44/0x54 [] driver_probe_device+0x1fc/0x2b0 [] __driver_attach+0xb4/0xc0 [] bus_for_each_dev+0x5c/0xa0 [] driver_attach+0x20/0x30 [] bus_add_driver+0x110/0x230 [] driver_register+0x60/0x100 [] acpi_processor_driver_init+0x2c/0xb0 [] do_one_initcall+0x38/0x130 [] kernel_init_freeable+0x210/0x2b4 [] kernel_init+0x10/0x110 [] ret_from_fork+0x10/0x50 It's because the spinlock inside pcc_mbox_request_channel() is kept too long. This patch releases spinlock before request_irq() and free_irq() to fix this issue as spinlock is only needed to protect the channel data. Signed-off-by: Hoan Tran Reviewed-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki --- drivers/mailbox/pcc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 08c87fadca8c..1f32688c312d 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "mailbox.h" @@ -267,6 +268,8 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) chan->txdone_method |= TXDONE_BY_ACK; + spin_unlock_irqrestore(&chan->lock, flags); + if (pcc_doorbell_irq[subspace_id] > 0) { int rc; @@ -275,12 +278,11 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (unlikely(rc)) { dev_err(dev, "failed to register PCC interrupt %d\n", pcc_doorbell_irq[subspace_id]); + pcc_mbox_free_channel(chan); chan = ERR_PTR(rc); } } - spin_unlock_irqrestore(&chan->lock, flags); - return chan; } EXPORT_SYMBOL_GPL(pcc_mbox_request_channel); @@ -304,20 +306,19 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) return; } + if (pcc_doorbell_irq[id] > 0) + devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); + spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) chan->txdone_method = TXDONE_BY_POLL; - if (pcc_doorbell_irq[id] > 0) - devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); - spin_unlock_irqrestore(&chan->lock, flags); } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); - /** * pcc_send_data - Called from Mailbox Controller code. Used * here only to ring the channel doorbell. The PCC client From 5d0d4b91bf627f14f95167b738d524156c9d440b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:32 +0800 Subject: [PATCH 216/305] Revert "bnx2: Reset device during driver initialization" This reverts commit 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c. When people build bnx2 driver into kernel, it will fail to detect and load firmware because firmware is contained in initramfs and initramfs has not been uncompressed yet during do_initcalls. So revert commit 3e1be7a and work out a new way in the later patch. Signed-off-by: Baoquan He Acked-by: Paul Menzel Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index b3791b394715..c55797291b57 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6361,6 +6361,10 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto out; + netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6429,6 +6433,7 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); + bnx2_release_firmware(bp); goto out; } @@ -8575,12 +8580,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto error; - - - bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8613,7 +8612,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); From 6df77862f63f389df3b1ad879738e04440d7385d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:33 +0800 Subject: [PATCH 217/305] bnx2: Wait for in-flight DMA to complete at probe stage In-flight DMA from 1st kernel could continue going in kdump kernel. New io-page table has been created before bnx2 does reset at open stage. We have to wait for the in-flight DMA to complete to avoid it look up into the newly created io-page table at probe stage. Suggested-by: Michael Chan Signed-off-by: Baoquan He Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 38 +++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index c55797291b57..1f7034d739b0 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -49,6 +49,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 @@ -4764,15 +4765,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp) BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR); } -static int -bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +static void +bnx2_wait_dma_complete(struct bnx2 *bp) { u32 val; - int i, rc = 0; - u8 old_port; + int i; - /* Wait for the current PCI transaction to complete before - * issuing a reset. */ + /* + * Wait for the current PCI transaction to complete before + * issuing a reset. + */ if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) || (BNX2_CHIP(bp) == BNX2_CHIP_5708)) { BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS, @@ -4796,6 +4798,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) } } + return; +} + + +static int +bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +{ + u32 val; + int i, rc = 0; + u8 old_port; + + /* Wait for the current PCI transaction to complete before + * issuing a reset. */ + bnx2_wait_dma_complete(bp); + /* Wait for the firmware to tell us it is ok to issue a reset. */ bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1); @@ -8580,6 +8597,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + /* + * In-flight DMA from 1st kernel could continue going in kdump kernel. + * New io-page table has been created before bnx2 does reset at open stage. + * We have to wait for the in-flight DMA to complete to avoid it look up + * into the newly created io-page table. + */ + if (is_kdump_kernel()) + bnx2_wait_dma_complete(bp); + memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | From 5bf35ddfee052d44f39ebaa395d87101c8918405 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Nov 2016 21:44:37 +0800 Subject: [PATCH 218/305] sctp: change sk state only when it has assocs in sctp_shutdown Now when users shutdown a sock with SEND_SHUTDOWN in sctp, even if this sock has no connection (assoc), sk state would be changed to SCTP_SS_CLOSING, which is not as we expect. Besides, after that if users try to listen on this sock, kernel could even panic when it dereference sctp_sk(sk)->bind_hash in sctp_inet_listen, as bind_hash is null when sock has no assoc. This patch is to move sk state change after checking sk assocs is not empty, and also merge these two if() conditions and reduce indent level. Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index faa48ff5cf4b..f23ad913dc7a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4285,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; - struct sctp_association *asoc; if (!sctp_style(sk, TCP)) return; - if (how & SEND_SHUTDOWN) { + ep = sctp_sk(sk)->ep; + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { + struct sctp_association *asoc; + sk->sk_state = SCTP_SS_CLOSING; - ep = sctp_sk(sk)->ep; - if (!list_empty(&ep->asocs)) { - asoc = list_entry(ep->asocs.next, - struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(net, asoc, NULL); - } + asoc = list_entry(ep->asocs.next, + struct sctp_association, asocs); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } From 977c1f9c8c022d0173181766b34a0db3705265a4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 7 Nov 2016 15:14:20 -0800 Subject: [PATCH 219/305] ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records ftrace_shutdown() checks for sanity of ftrace records and if dyn_ftrace->flags is not zero, it will warn. It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point, since some module was loaded, but before ftrace_module_enable() cleared the flags for this module. In other words the module.c is doing: ftrace_module_init(mod); // calls ftrace_update_code() that sets flags=FTRACE_FL_DISABLED ... // here ftrace_shutdown() is called that warns, since err = prepare_coming_module(mod); // didn't have a chance to clear FTRACE_FL_DISABLED Fix it by ignoring disabled records. It's similar to what __ftrace_hash_rec_update() is already doing. Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-ast@fb.com Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2050a7652a86..326498baab83 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); From 546fece4eae871f033925ccf0ff2b740725ae915 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 14 Nov 2016 16:31:49 -0500 Subject: [PATCH 220/305] ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records When a module is first loaded and its function ip records are added to the ftrace list of functions to modify, they are set to DISABLED, as their text is still in a read only state. When the module is fully loaded, and can be updated, the flag is cleared, and if their's any functions that should be tracing them, it is updated at that moment. But there's several locations that do record accounting and should ignore records that are marked as disabled, or they can cause issues. Alexei already fixed one location, but others need to be addressed. Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Reported-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 326498baab83..da87b3cba5b3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, /* Update rec->flags */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + /* We need to update only differences of filter_hash */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); @@ -1884,6 +1888,10 @@ rollback: /* Roll back what we did above */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (rec == end) goto err_out; @@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable) return; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + failed = __ftrace_replace_code(rec, enable); if (failed) { ftrace_bug(failed, rec); @@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) goto out_unlock; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); if (ret < 0) { @@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (!ftrace_match_record(rec, &func_g, NULL, 0)) continue; @@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, NULL, 0)) { /* if it is in the array */ exists = false; From c51e424dc79e1428afc4d697cdb6a07f7af70cbf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 13 Nov 2016 17:50:35 -0800 Subject: [PATCH 221/305] net: stmmac: Fix lack of link transition for fixed PHYs Commit 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") added some logic to avoid polling the fixed PHY and therefore invoking the adjust_link callback more than once, since this is a fixed PHY and link events won't be generated. This works fine the first time, because we start with phydev->irq = PHY_POLL, so we call adjust_link, then we set phydev->irq = PHY_IGNORE_INTERRUPT and we stop polling the PHY. Now, if we called ndo_close(), which calls both phy_stop() and does an explicit netif_carrier_off(), we end up with a link down. Upon calling ndo_open() again, despite starting the PHY state machine, we have PHY_IGNORE_INTERRUPT set, and we generate no link event at all, so the link is permanently down. Fixes: 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") Signed-off-by: Florian Fainelli Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 48e71fad4210..e2c94ec4edd0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -880,6 +880,13 @@ static int stmmac_init_phy(struct net_device *dev) return -ENODEV; } + /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid + * subsequent PHY polling, make sure we force a link transition if + * we have a UP/DOWN/UP transition + */ + if (phydev->is_pseudo_fixed_link) + phydev->irq = PHY_POLL; + pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); From ac571de999e14b87890cb960ad6f03fbdde6abc8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2016 11:26:32 +0100 Subject: [PATCH 222/305] mlxsw: spectrum_router: Flush FIB tables during fini Since commit b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") we reflect to the device the entire FIB table and not only FIBs that point to netdevs created by the driver. During module removal, FIBs of the second type are removed following NETDEV_UNREGISTER events sent. The other FIBs are still present in both the driver's cache and the device's table. Fix this by iterating over all the FIB tables in the device and flush them. There's no need to take locks, as we're the only writer. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cbeeddd70c5a..e83072da6272 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -594,8 +594,11 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); + static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) { + mlxsw_sp_router_fib_flush(mlxsw_sp); kfree(mlxsw_sp->router.vrs); } @@ -1867,18 +1870,18 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_resources *resources; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_entry *tmp; struct mlxsw_sp_vr *vr; int i; - int err; resources = mlxsw_core_resources_get(mlxsw_sp->core); for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) continue; @@ -1894,6 +1897,13 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) break; } } +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + mlxsw_sp_router_fib_flush(mlxsw_sp); mlxsw_sp->router.aborted = true; err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); if (err) From e123386bc31bbf467dc558f2f919de0b8b4ba58c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 7 Nov 2016 14:32:02 -0500 Subject: [PATCH 223/305] tile: handle __ro_after_init like parisc does The tile architecture already marks RO_DATA as read-only in the kernel, so grouping RO_AFTER_INIT_DATA with RO_DATA, as is done by default, means the kernel faults in init when it tries to write to RO_AFTER_INIT_DATA. For now, just arrange that __ro_after_init is handled like __write_once, i.e. __read_mostly. Reviewed-by: Kees Cook Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cache.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 6160761d5f61..4810e48dbbbf 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -61,4 +61,7 @@ */ #define __write_once __read_mostly +/* __ro_after_init is the generic name for the tile arch __write_once. */ +#define __ro_after_init __read_mostly + #endif /* _ASM_TILE_CACHE_H */ From 60f8339eb388df8a46f8eb4282ff0e15f08f218c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Nov 2016 15:01:09 +0100 Subject: [PATCH 224/305] gpio: do not double-check direction on sleeping chips When locking a GPIO line as IRQ, we go to lengths to double-check that the line is really set as input before marking it as used for IRQ. This is not good on GPIO chips that can sleep, because this function is called in IRQ-safe context. Just skip this if it can't be checked quickly. Currently this happens on sleeping expanders such as STMPE or TC3589x: BUG: scheduling while atomic: swapper/1/0x00000002 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.9.0-rc1+ #38 Hardware name: Nomadik STn8815 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__schedule_bug+0x54/0x80) [] (__schedule_bug) from [] (__schedule+0x3a0/0x460) [] (__schedule) from [] (schedule+0x54/0xb8) (...) This patch fixes that problem and relies on the direction read from the chip when it was added. Cc: stable@vger.kernel.org Fixes: 9c10280d85c1 ("gpio: flush direction status in gpiochip_lock_as_irq()") Cc: Patrice Chotard Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 93ed0e00c578..868128a676ba 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2737,8 +2737,11 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset) if (IS_ERR(desc)) return PTR_ERR(desc); - /* Flush direction if something changed behind our back */ - if (chip->get_direction) { + /* + * If it's fast: flush the direction setting if something changed + * behind our back + */ + if (!chip->can_sleep && chip->get_direction) { int dir = chip->get_direction(chip, offset); if (dir) From 220a04f0e53276eb3da666174bcf97489fd8644e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:10:29 +0100 Subject: [PATCH 225/305] gpio: tc3589x: fix up .get_direction() The bit in the TC3589x direction register is 0 for input and 1 for output, but the gpiolib expects the reverse. Fix up the logic. Cc: stable@vger.kernel.org Fixes: 14063d71e5e6 ("gpio: tc3589x: add .get_direction() and small cleanup") Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 5a5a6cb00eea..d6e21f1a70a9 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -97,7 +97,7 @@ static int tc3589x_gpio_get_direction(struct gpio_chip *chip, if (ret < 0) return ret; - return !!(ret & BIT(pos)); + return !(ret & BIT(pos)); } static int tc3589x_gpio_set_single_ended(struct gpio_chip *chip, From c0a36013639b06760f7c2c21a8387eac855432e1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Nov 2016 15:28:33 +1100 Subject: [PATCH 226/305] powerpc/64: Fix setting of AIL in hypervisor mode Commit d3cbff1b5 "powerpc: Put exception configuration in a common place" broke the setting of the AIL bit (which enables taking exceptions with the MMU still on) on all processors, moving it incorrectly to a function called only on the boot CPU. This was correct for the guest case but not when running in hypervisor mode. This fixes it by partially reverting that commit, putting the setting back in cpu_ready_for_interrupts() Fixes: d3cbff1b5a90 ("powerpc: Put exception configuration in a common place") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7ac8e6eaab5b..8d586cff8a41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -226,17 +226,25 @@ static void __init configure_exceptions(void) if (firmware_has_feature(FW_FEATURE_OPAL)) opal_configure_cores(); - /* Enable AIL if supported, and we are in hypervisor mode */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); - } + /* AIL on native is done in cpu_ready_for_interrupts() */ } } static void cpu_ready_for_interrupts(void) { + /* + * Enable AIL if supported, and we are in hypervisor mode. This + * is called once for every processor. + * + * If we are not in hypervisor mode the job is done once for + * the whole partition in configure_exceptions(). + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } From 5bf7b6e86f29f064979d7b3e6dd21c5dd1feb855 Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Tue, 15 Nov 2016 09:47:00 +0100 Subject: [PATCH 227/305] ARM: dts: STiH410-b2260: Fix typo in spi0 chipselect definition Change cs-gpio to cs-gpios. Signed-off-by: Loic Pallardy Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih410-b2260.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts index ef2ff2f518f6..7fb507fcba7e 100644 --- a/arch/arm/boot/dts/stih410-b2260.dts +++ b/arch/arm/boot/dts/stih410-b2260.dts @@ -74,7 +74,7 @@ /* Low speed expansion connector */ spi0: spi@9844000 { label = "LS-SPI0"; - cs-gpio = <&pio30 3 0>; + cs-gpios = <&pio30 3 0>; status = "okay"; }; From 59c3b76cc61d1d676f965c192cc7969aa5cb2744 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 18 Aug 2016 09:10:44 +0200 Subject: [PATCH 228/305] fuse: fix fuse_write_end() if zero bytes were copied If pos is at the beginning of a page and copied is zero then page is not zeroed but is marked uptodate. Fix by skipping everything except unlock/put of page if zero bytes were copied. Reported-by: Al Viro Fixes: 6b12c1b37e55 ("fuse: Implement write_begin/write_end callbacks") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index abc66a6237fd..2401c5dabb2a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = page->mapping->host; + /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ + if (!copied) + goto unlock; + if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ size_t endoff = (pos + copied) & ~PAGE_MASK; @@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, fuse_write_update_size(inode, pos + copied); set_page_dirty(page); + +unlock: unlock_page(page); put_page(page); From 24c66dfd569c4744fc43aea638155ad2dc1499d8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 15 Nov 2016 13:55:59 +0000 Subject: [PATCH 229/305] ARM: fix backtrace Recent kernels have changed their behaviour to be more inconsistent when handling printk continuations. With todays kernels, the output looks sane on the console, but dmesg splits individual printk()s which do not have the KERN_CONT prefix into separate lines. Since the assembly code is not trivial to add the KERN_CONT, and we ideally want to avoid using KERN_CONT (as multiple printk()s can race between different threads), convert the assembly dumping the register values to C code, and have the C code build the output a line at a time before dumping to the console. This avoids the KERN_CONT issue, and also avoids situations where the output is intermixed with other console activity. Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 20 ++++++++++++++++++++ arch/arm/lib/backtrace.S | 37 +++---------------------------------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index bc698383e822..9688ec0c6ef4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -74,6 +74,26 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); } +void dump_backtrace_stm(u32 *stack, u32 instruction) +{ + char str[80], *p; + unsigned int x; + int reg; + + for (reg = 10, x = 0, p = str; reg >= 0; reg--) { + if (instruction & BIT(reg)) { + p += sprintf(p, " r%d:%08x", reg, *stack--); + if (++x == 6) { + x = 0; + p = str; + printk("%s\n", str); + } + } + } + if (p != str) + printk("%s\n", str); +} + #ifndef CONFIG_ARM_UNWIND /* * Stack pointers should always be within the kernels view of diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index fab5a50503ae..7d7952e5a3b1 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -10,6 +10,7 @@ * 27/03/03 Ian Molton Clean up CONFIG_CPU * */ +#include #include #include .text @@ -83,13 +84,13 @@ for_each_frame: tst frame, mask @ Check for address exceptions teq r3, r1, lsr #11 ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #11 subeq r0, frame, #16 - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers teq sv_fp, #0 @ zero saved fp means beq no_frame @ no further frames @@ -112,38 +113,6 @@ ENDPROC(c_backtrace) .long 1004b, 1006b .popsection -#define instr r4 -#define reg r5 -#define stack r6 - -.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} - mov stack, r0 - mov instr, r1 - mov reg, #10 - mov r7, #0 -1: mov r3, #1 - ARM( tst instr, r3, lsl reg ) - THUMB( lsl r3, reg ) - THUMB( tst instr, r3 ) - beq 2f - add r7, r7, #1 - teq r7, #6 - moveq r7, #0 - adr r3, .Lcr - addne r3, r3, #1 @ skip newline - ldr r2, [stack], #-4 - mov r1, reg - adr r0, .Lfp - bl printk -2: subs reg, reg, #1 - bpl 1b - teq r7, #0 - adrne r0, .Lcr - blne printk - ldmfd sp!, {instr, reg, stack, r7, pc} - -.Lfp: .asciz " r%d:%08x%s" -.Lcr: .asciz "\n" .Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" .align .Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} From 544457fa278216c5fcea6a16e9b2ee8aadaca0ca Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Nov 2016 21:58:36 +0100 Subject: [PATCH 230/305] ARM: 8624/1: proc-v7m.S: fix init section name There is no .text.init sections. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/proc-v7m.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index f6d333f09bfe..8dea61640cc1 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -96,7 +96,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".text.init", #alloc, #execinstr + .section ".init.text", #alloc, #execinstr __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) From 256ff1cf6b44cba9c9c2059f4516259e9319a808 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 15 Nov 2016 14:00:53 +0100 Subject: [PATCH 231/305] ARM: 8628/1: dma-mapping: preallocate DMA-debug hash tables in core_initcall fs_initcall is definitely too late to initialize DMA-debug hash tables, because some drivers might get probed and use DMA mapping framework already in core_initcall. Late initialization of DMA-debug results in false warning about accessing memory, that was not allocated, like this one: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at lib/dma-debug.c:1104 check_unmap+0xa1c/0xe50 exynos-sysmmu 10a60000.sysmmu: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x000000006ebd0000] [size=16384 bytes] Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc5-00028-g39dde3d-dirty #44 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x84/0xa0) [] (dump_stack) from [] (__warn+0x14c/0x180) [] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) [] (warn_slowpath_fmt) from [] (check_unmap+0xa1c/0xe50) [] (check_unmap) from [] (debug_dma_unmap_page+0x98/0xc8) [] (debug_dma_unmap_page) from [] (exynos_iommu_domain_free+0x158/0x380) [] (exynos_iommu_domain_free) from [] (iommu_domain_free+0x34/0x60) [] (iommu_domain_free) from [] (release_iommu_mapping+0x30/0xb8) [] (release_iommu_mapping) from [] (arm_iommu_release_mapping+0x4c/0x50) [] (arm_iommu_release_mapping) from [] (s5p_mfc_probe+0x640/0x80c) [] (s5p_mfc_probe) from [] (platform_drv_probe+0x70/0x148) [] (platform_drv_probe) from [] (driver_probe_device+0x12c/0x6b0) [] (driver_probe_device) from [] (__driver_attach+0x128/0x17c) [] (__driver_attach) from [] (bus_for_each_dev+0x88/0xc8) [] (bus_for_each_dev) from [] (driver_attach+0x34/0x58) [] (driver_attach) from [] (bus_add_driver+0x18c/0x32c) [] (bus_add_driver) from [] (driver_register+0x98/0x148) [] (driver_register) from [] (__platform_driver_register+0x58/0x74) [] (__platform_driver_register) from [] (s5p_mfc_driver_init+0x1c/0x20) [] (s5p_mfc_driver_init) from [] (do_one_initcall+0x64/0x258) [] (do_one_initcall) from [] (kernel_init_freeable+0x3d0/0x4d0) [] (kernel_init_freeable) from [] (kernel_init+0x18/0x134) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) ---[ end trace dc54c54bd3581296 ]--- This patch moves initialization of DMA-debug to core_initcall. This is safe from the initialization perspective. dma_debug_do_init() internally calls debugfs functions and debugfs also gets initialised at core_initcall(), and that is earlier than arch code in the link order, so it will get initialized just before the DMA-debug. Reported-by: Seung-Woo Kim Signed-off-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab4f74536057..ab7710002ba6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1167,7 +1167,7 @@ static int __init dma_debug_do_init(void) dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } -fs_initcall(dma_debug_do_init); +core_initcall(dma_debug_do_init); #ifdef CONFIG_ARM_DMA_USE_IOMMU From c6a385539175ebc603da53aafb7753d39089f32e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2016 19:41:31 +0100 Subject: [PATCH 232/305] kbuild: Steal gcc's pie from the very beginning So Sebastian turned off the PIE for kernel builds but that was too late - Kbuild.include already uses KBUILD_CFLAGS and trying to disable gcc options with, say cc-disable-warning, fails: gcc -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs ... -Wno-sign-compare -fno-asynchronous-unwind-tables -Wframe-address -c -x c /dev/null -o .31392.tmp /dev/null:1:0: error: code model kernel does not support PIC mode because that returns an error and we can't disable the warning. For example in this case: KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) which leads to gcc issuing all those warnings again. So let's turn off PIE/PIC at the earliest possible moment, when we declare KBUILD_CFLAGS so that cc-disable-warning picks it up too. Also, we need the $(call cc-option ...) because -fno-PIE is supported since gcc v3.4 and our lowest supported gcc version is 3.2 right now. Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Ben Hutchings Cc: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 58fc5d935ce6..77ac3f88ec37 100644 --- a/Makefile +++ b/Makefile @@ -399,11 +399,12 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 + -std=gnu89 $(call cc-option,-fno-PIE) + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := -KBUILD_AFLAGS := -D__ASSEMBLY__ +KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE) KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds @@ -622,8 +623,6 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) -KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) From cb434658a8ff151c221a9ac1d44fb6788100cd0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Nov 2016 11:39:08 -0500 Subject: [PATCH 233/305] drm/amdgpu/powerplay: drop a redundant NULL check Left over from an earlier rev of the patch. Acked-by: Colin Ian King Cc: Dan Carpenter Cc: Colin King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index b0c929dd8beb..13f2b705ea49 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1469,8 +1469,6 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr) table_info->vddgfx_lookup_table, vv_id, &sclk)) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher)) { - if (table_info == NULL) - return -EINVAL; sclk_table = table_info->vdd_dep_on_sclk; for (j = 1; j < sclk_table->count; j++) { From 1da2c326e43b0834105993d13610647337bbad67 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 11 Nov 2016 11:24:29 +0800 Subject: [PATCH 234/305] drm/amdgpu:fix vpost_needed routine 1,cleanup description/comments 2,for FIJI & passthrough, force post when smc fw version below 22.15 3,for other cases, follow regular rules Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 ++++++---------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7ca07e7b25c1..3161d77bf299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -658,12 +658,10 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return false; if (amdgpu_passthrough(adev)) { - /* for FIJI: In whole GPU pass-through virtualization case - * old smc fw won't clear some registers (e.g. MEM_SIZE, BIOS_SCRATCH) - * so amdgpu_card_posted return false and driver will incorrectly skip vPost. - * but if we force vPost do in pass-through case, the driver reload will hang. - * whether doing vPost depends on amdgpu_card_posted if smc version is above - * 00160e00 for FIJI. + /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot + * some old smc fw still need driver do vPost otherwise gpu hang, while + * those smc fw version above 22.15 doesn't have this flaw, so we force + * vpost executed for smc version below 22.15 */ if (adev->asic_type == CHIP_FIJI) { int err; @@ -674,22 +672,11 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); - if (fw_ver >= 0x00160e00) - return !amdgpu_card_posted(adev); + if (fw_ver < 0x00160e00) + return true; } - } else { - /* in bare-metal case, amdgpu_card_posted return false - * after system reboot/boot, and return true if driver - * reloaded. - * we shouldn't do vPost after driver reload otherwise GPU - * could hang. - */ - if (amdgpu_card_posted(adev)) - return false; } - - /* we assume vPost is neede for all other cases */ - return true; + return !amdgpu_card_posted(adev); } /** From e1fafdcbe0e3e769c6a83317dd845bc99b4fe61d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 10 Oct 2016 06:14:45 -0700 Subject: [PATCH 235/305] IB/rdmavt: rdmavt can handle non aligned page maps The initial code for rdmavt carried with it a restriction that was a vestige from the qib driver, that to dma map a page it had to be less than a page size. This is not the case on modern hardware, both qib and hfi1 will be just fine with unaligned map requests. This fixes a 4.8 regression where by an IPoIB transfer of > PAGE_SIZE will hang because the dma map page call always fails. This was introduced after commit 5faba5469522 ("IB/ipoib: Report SG feature regardless of HW UD CSUM capability") added the capability to use SG by default. Rather than override this, the HW supports it, so allow SG. Cc: Stable # 4.8 Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/dma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 01f71caa3ac4..f2cefb0d9180 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page, if (WARN_ON(!valid_dma_direction(direction))) return BAD_DMA_ADDRESS; - if (offset + size > PAGE_SIZE) - return BAD_DMA_ADDRESS; - addr = (u64)page_address(page); if (addr) addr += offset; From 39eb2795f19233330bc14a8450b4042d784b15a7 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 10 Oct 2016 06:14:50 -0700 Subject: [PATCH 236/305] IB/hfi1: Remove redundant sysfs irq affinity entry The IRQ affinity entry is not needed after the irq notifier patch has been added to the hfi1 driver. The irq affinity settings for SDMA engine should be set using the standard /proc/irq// interface. Reviewed-by: Jianxin Xiong Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 72 --------------------------- drivers/infiniband/hw/hfi1/affinity.h | 4 -- drivers/infiniband/hw/hfi1/sysfs.c | 25 ---------- 3 files changed, 101 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a26a9a0bfc41..67ea85a56945 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu) } mutex_unlock(&affinity->lock); } - -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count) -{ - struct hfi1_affinity_node *entry; - cpumask_var_t mask; - int ret, i; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - ret = -EINVAL; - goto unlock; - } - - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto unlock; - } - - ret = cpulist_parse(buf, mask); - if (ret) - goto out; - - if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { - dd_dev_warn(dd, "Invalid CPU mask\n"); - ret = -EINVAL; - goto out; - } - - /* reset the SDMA interrupt affinity details */ - init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, mask); - - /* Reassign the affinity for each SDMA interrupt. */ - for (i = 0; i < dd->num_msix_entries; i++) { - struct hfi1_msix_entry *msix; - - msix = &dd->msix_entries[i]; - if (msix->type != IRQ_SDMA) - continue; - - ret = get_irq_affinity(dd, msix); - - if (ret) - break; - } -out: - free_cpumask_var(mask); -unlock: - mutex_unlock(&node_affinity.lock); - return ret ? ret : strnlen(buf, PAGE_SIZE); -} - -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) -{ - struct hfi1_affinity_node *entry; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - mutex_unlock(&node_affinity.lock); - return -EINVAL; - } - - cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&node_affinity.lock); - return strnlen(buf, PAGE_SIZE); -} diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index b89ea3c0ee1a..42e63316afd1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int); /* Release a CPU used by a user process. */ void hfi1_put_proc_affinity(int); -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count); - struct hfi1_affinity_node { int node; struct cpu_mask_set def_intr; diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index edba22461a9c..919a5474e651 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -49,7 +49,6 @@ #include "hfi.h" #include "mad.h" #include "trace.h" -#include "affinity.h" /* * Start of per-port congestion control structures and support code @@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device, return ret; } -static ssize_t show_sdma_affinity(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_get_sdma_affinity(dd, buf); -} - -static ssize_t store_sdma_affinity(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_set_sdma_affinity(dd, buf, count); -} - /* * end of per-unit (or driver, in some cases, but replicated * per unit) functions @@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); -static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, - store_sdma_affinity); static struct device_attribute *hfi1_attributes[] = { &dev_attr_hw_rev, @@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = { &dev_attr_boardversion, &dev_attr_tempsense, &dev_attr_chip_reset, - &dev_attr_sdma_affinity, }; int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, From d9ac4555fb2bcd6b794aaa0b39acad81111d9f42 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Mon, 10 Oct 2016 06:14:56 -0700 Subject: [PATCH 237/305] IB/hfi1: Fix integrity check flags default values Prevent setting up integrity check flags when module is loaded with NO_INTEGRITY capability. Reviewed-by: Dean Luick Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 40 +++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/pio.c | 13 +++------- drivers/infiniband/hw/hfi1/sdma.c | 19 ++------------- 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7eef11b316ff..3c06d204bafd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1848,7 +1848,13 @@ extern struct mutex hfi1_mutex; static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, u16 ctxt_type) { - u64 base_sc_integrity = + u64 base_sc_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sc_integrity = SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK @@ -1863,7 +1869,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; @@ -1872,18 +1877,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sc_integrity & - ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sc_integrity; } static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) { - u64 base_sdma_integrity = + u64 base_sdma_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sdma_integrity = SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK - | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK @@ -1895,14 +1905,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sdma_integrity & - ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK; + + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sdma_integrity; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 50a3a36d9363..d89b8745d4c1 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) void set_pio_integrity(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - u64 reg = 0; u32 hw_context = sc->hw_context; int type = sc->type; - /* - * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if - * we're snooping. - */ - if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE) - reg = hfi1_pkt_default_send_ctxt_mask(dd, type); - - write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg); + write_kctxt_csr(dd, hw_context, + SC(CHECK_ENABLE), + hfi1_pkt_default_send_ctxt_mask(dd, type)); } static u32 get_buffers_allocated(struct send_context *sc) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index fd39bcaa062d..9cbe52d21077 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde) write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg); } -#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ -(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) - -#define SET_STATIC_RATE_CONTROL_SMASK(r) \ -(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) /* * set_sdma_integrity * @@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde) static void set_sdma_integrity(struct sdma_engine *sde) { struct hfi1_devdata *dd = sde->dd; - u64 reg; - if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY))) - return; - - reg = hfi1_pkt_base_sdma_integrity(dd); - - if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) - CLEAR_STATIC_RATE_CONTROL_SMASK(reg); - else - SET_STATIC_RATE_CONTROL_SMASK(reg); - - write_sde_csr(sde, SD(CHECK_ENABLE), reg); + write_sde_csr(sde, SD(CHECK_ENABLE), + hfi1_pkt_base_sdma_integrity(dd)); } static void init_sdma_regs( From acd7c8fe14938a315f0ac1b92a92375f7226c2fd Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 25 Oct 2016 08:57:55 -0700 Subject: [PATCH 238/305] IB/hfi1: Fix an Oops on pci device force remove This patch fixes an Oops on device unbind, when the device is used by a PSM user process. PSM processes access device resources which are freed on device removal. Similar protection exists in uverbs in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence a separate protection is required for PSM clients. Cc: Jason Gunthorpe Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++++ drivers/infiniband/hw/hfi1/file_ops.c | 19 ++++++++++++++++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++++ drivers/infiniband/hw/hfi1/init.c | 21 +++++++++++++++++++-- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23544d4..799215255b49 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; + init_completion(&dd->user_comp); + + /* The user refcount starts with one to inidicate an active device */ + atomic_set(&dd->user_refcount, 1); + goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 677efa0e8cd6..bd786b7bd30b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!atomic_inc_not_zero(&dd->user_refcount)) + return -ENXIO; + /* Just take a ref now. Not all opens result in a context assign */ kobject_get(&dd->kobj); @@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; atomic_inc(&fd->mm->mm_count); + fp->private_data = fd; + } else { + fp->private_data = NULL; + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + return -ENOMEM; } - fp->private_data = fd; - - return fd ? 0 : -ENOMEM; + return 0; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) done: mmdrop(fdata->mm); kobject_put(&dd->kobj); + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3c06d204bafd..368e96c109a5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1174,6 +1174,10 @@ struct hfi1_devdata { spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; + /* Keeps track of user space clients */ + atomic_t user_refcount; + /* Used to wait for outstanding user space clients before dev removal */ + struct completion user_comp; struct hfi1_affinity *affinity; struct rhashtable sdma_rht; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 60db61536fed..e28a6b633ea9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1538,12 +1538,31 @@ bail: return ret; } +static void wait_for_clients(struct hfi1_devdata *dd) +{ + /* + * Remove the device init value and complete the device if there is + * no clients or wait for active clients to finish. + */ + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + wait_for_completion(&dd->user_comp); +} + static void remove_one(struct pci_dev *pdev) { struct hfi1_devdata *dd = pci_get_drvdata(pdev); /* close debugfs files before ib unregister */ hfi1_dbg_ibdev_exit(&dd->verbs_dev); + + /* remove the /dev hfi1 interface */ + hfi1_device_remove(dd); + + /* wait for existing user space clients to finish */ + wait_for_clients(dd); + /* unregister from IB core */ hfi1_unregister_ib_device(dd); @@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev) /* wait until all of our (qsfp) queue_work() calls complete */ flush_workqueue(ib_wq); - hfi1_device_remove(dd); - postinit_cleanup(dd); } From 83fb4af6800deb4f3d19b297df6148cda5c016de Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Mon, 17 Oct 2016 04:19:24 -0700 Subject: [PATCH 239/305] IB/hfi1: Return ENODEV for unsupported PCI device ids. Clean up device type checking. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e28a6b633ea9..baea53f862f9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1402,7 +1402,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; - struct hfi1_devdata *dd = ERR_PTR(-EINVAL); + struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; /* First, lock the non-writable module parameters */ @@ -1461,26 +1461,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - /* - * Do device-specific initialization, function table setup, dd - * allocation, etc. - */ - switch (ent->device) { - case PCI_DEVICE_ID_INTEL0: - case PCI_DEVICE_ID_INTEL1: - dd = hfi1_init_dd(pdev, ent); - break; - default: + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { hfi1_early_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", ent->device); ret = -ENODEV; + goto clean_bail; } - if (IS_ERR(dd)) + /* + * Do device-specific initialization, function table setup, dd + * allocation, etc. + */ + dd = hfi1_init_dd(pdev, ent); + + if (IS_ERR(dd)) { ret = PTR_ERR(dd); - if (ret) goto clean_bail; /* error already printed */ + } ret = create_workqueues(dd); if (ret) From 4dfe7cceb2bfd98783b4966d7c881a7552932d31 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 17 Oct 2016 04:19:41 -0700 Subject: [PATCH 240/305] IB/hfi1: Fix a potential memory leak in hfi1_create_ctxts() In the function hfi1_create_ctxts the array "dd->rcd" is allocated and then populated with allocated resources in a loop. Previously, if error happened during the loop, only resource allocated in the current iteration would be freed. The array itself would then be freed, leaving the resources that were allocated in previous iterations and referenced by the array elements in limbo. This patch makes sure all allocated resources are freed before freeing the array "dd->rcd". Also the resource allocation now takes account of the numa node the device is attached to. Reviewed-by: Tadeusz Struk Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index baea53f862f9..e27b65dbe293 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd; ppd = dd->pport + (i % dd->num_pports); + + /* dd->rcd[i] gets assigned inside the callee */ rcd = hfi1_create_ctxtdata(ppd, i, dd->node); if (!rcd) { dd_dev_err(dd, @@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (!rcd->sc) { dd_dev_err(dd, "Unable to allocate kernel send context, failing\n"); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); goto nomem; } @@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (ret < 0) { dd_dev_err(dd, "Failed to setup kernel receive context, failing\n"); - sc_free(rcd->sc); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); ret = -EFAULT; goto bail; } @@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; bail: + if (dd->rcd) { + for (i = 0; i < dd->num_rcv_contexts; ++i) + hfi1_free_ctxtdata(dd, dd->rcd[i]); + } kfree(dd->rcd); dd->rcd = NULL; return ret; @@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, dd->num_rcv_contexts - dd->first_user_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - (dd->num_rcv_contexts - dd->first_user_ctxt)); - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; From eacc830f95c0d8c5cbbda1bdba2ddc8f14bc248d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 17 Oct 2016 04:19:52 -0700 Subject: [PATCH 241/305] IB/hfi1: Remove leftover snoop references A few snoop related variables were missed in the snoop/capture removal to get out of staging. Go back and clean those up too. Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 18 +------- drivers/infiniband/hw/hfi1/hfi.h | 40 +----------------- drivers/infiniband/hw/hfi1/trace_rx.h | 60 --------------------------- 3 files changed, 4 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 799215255b49..859341a25e56 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) /* leave shared count at zero for both global and VL15 */ write_global_credit(dd, vau, vl15buf, 0); - /* We may need some credits for another VL when sending packets - * with the snoop interface. Dividing it down the middle for VL15 - * and VL0 should suffice. - */ - if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT); - } else { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - } + write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf + << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); } /* @@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); - if (dd->hfi1_snoop.mode_flag) - dd_dev_info(dd, "Set lid/lmc while snooping"); - c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 368e96c109a5..93e2fcebd083 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -367,26 +367,6 @@ struct hfi1_packet { u8 etype; }; -/* - * Private data for snoop/capture support. - */ -struct hfi1_snoop_data { - int mode_flag; - struct cdev cdev; - struct device *class_dev; - /* protect snoop data */ - spinlock_t snoop_lock; - struct list_head queue; - wait_queue_head_t waitq; - void *filter_value; - int (*filter_callback)(void *hdr, void *data, void *value); - u64 dcc_cfg; /* saved value of DCC Cfg register */ -}; - -/* snoop mode_flag values */ -#define HFI1_PORT_SNOOP_MODE 1U -#define HFI1_PORT_CAPTURE_MODE 2U - struct rvt_sge_state; /* @@ -1104,8 +1084,6 @@ struct hfi1_devdata { char *portcntrnames; size_t portcntrnameslen; - struct hfi1_snoop_data hfi1_snoop; - struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; @@ -1141,8 +1119,8 @@ struct hfi1_devdata { rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* - * Handlers for outgoing data so that snoop/capture does not - * have to have its hooks in the send path + * Capability to have different send engines simply by changing a + * pointer value. */ send_routine process_pio_send; send_routine process_dma_send; @@ -1225,8 +1203,6 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -extern unsigned int snoop_drop_send; -extern unsigned int snoop_force_capture; int hfi1_init(struct hfi1_devdata *, int); int hfi1_count_units(int *npresentp, int *nupp); int hfi1_count_active_units(void); @@ -1561,13 +1537,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); -int snoop_recv_handler(struct hfi1_packet *packet); -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count); int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc); static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) @@ -1803,8 +1772,6 @@ int kdeth_process_expected(struct hfi1_packet *packet); int kdeth_process_eager(struct hfi1_packet *packet); int process_receive_invalid(struct hfi1_packet *packet); -extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8]; - void update_sge(struct rvt_sge_state *ss, u32 length); /* global module parameter variables */ @@ -1831,9 +1798,6 @@ extern struct mutex hfi1_mutex; #define DRIVER_NAME "hfi1" #define HFI1_USER_MINOR_BASE 0 #define HFI1_TRACE_MINOR 127 -#define HFI1_DIAGPKT_MINOR 128 -#define HFI1_DIAG_MINOR_BASE 129 -#define HFI1_SNOOP_CAPTURE_BASE 200 #define HFI1_NMINORS 255 #define PCI_VENDOR_ID_INTEL 0x8086 diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 11e02b228922..f77e59fb43fe 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate, ) ); -#define SNOOP_PRN \ - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" - -TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, slid) - __field(u16, dlid) - __field(u32, qpn) - __field(u8, opcode) - __field(u8, sl) - __field(u16, pkey) - __field(u32, hdr_len) - __field(u32, data_len) - __field(u8, lnh) - __dynamic_array(u8, raw_hdr, hdr_len) - __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( - struct ib_other_headers *ohdr; - - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - DD_DEV_ASSIGN(dd); - __entry->slid = be16_to_cpu(hdr->lrh[3]); - __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->hdr_len = hdr_len; - __entry->data_len = data_len; - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); - memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk( - "[%s] " SNOOP_PRN, - __get_str(dev), - __entry->slid, - __entry->dlid, - __entry->qpn, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->sl, - __entry->pkey, - __entry->hdr_len, - __entry->data_len - ) -); - #endif /* __HFI1_TRACE_RX_H */ #undef TRACE_INCLUDE_PATH From 26ea2544ddbe8855cb251e41ff3641c61655a15f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:19:58 -0700 Subject: [PATCH 242/305] IB/hfi1: Clean up unused argument hfi1_pcie_ddinit takes the PCI device id as an argument but never uses it. Clean it up. Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/pcie.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 859341a25e56..156ddf8f3dca 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14449,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, * Any error printing is already done by the init code. * On return, we have the chip mapped. */ - ret = hfi1_pcie_ddinit(dd, pdev, ent); + ret = hfi1_pcie_ddinit(dd, pdev); if (ret < 0) goto bail_free; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 93e2fcebd083..d906cf08504f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1736,8 +1736,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *, - const struct pci_device_id *); +int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); void hfi1_pcie_flr(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 89c68da1c273..4ac8f330c5cb 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev) * fields required to re-initialize after a chip reset, or for * various other purposes */ -int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, - const struct pci_device_id *ent) +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; From f0f98f74c91c68502e97e0d5526aa4e81b40b28a Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:20:04 -0700 Subject: [PATCH 243/305] IB/hfi1: Delete unused lock The lock is an unused vestige from qib. Remove it. Reviewed-by: Mike Marciniszyn Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 -- drivers/infiniband/hw/hfi1/init.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d906cf08504f..cc87fd4e534b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -593,8 +593,6 @@ struct hfi1_pportdata { struct mutex hls_lock; u32 host_link_state; - spinlock_t sdma_alllock ____cacheline_aligned_in_smp; - u32 lstate; /* logical link state */ /* these are the "32 bit" regs */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e27b65dbe293..0f82eebc4b9e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -507,7 +507,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); mutex_init(&ppd->hls_lock); - spin_lock_init(&ppd->sdma_alllock); spin_lock_init(&ppd->qsfp_info.qsfp_lock); ppd->qsfp_info.ppd = ppd; From 458ed666fe14a54dfb6690a1a7f541782d1342c9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 17 Oct 2016 04:20:09 -0700 Subject: [PATCH 244/305] IB/hfi1: Fix rnr_timer addition The new s_rnr_timeout was not properly being set and the code was incorrectly setting a different timer. Found by code inspection. Cc: # 4.7.x Fixes: 08279d5c9424 ("staging/rdma/hfi1: use new RNR timer") Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8bc5013f39a1..83198a8a8797 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.expires = jiffies + usecs_to_jiffies(to); + priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); } From 11501ab9df687c6f0852719a5165e16cd3eb3c10 Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Tue, 25 Oct 2016 13:12:11 -0700 Subject: [PATCH 245/305] IB/hfi1: Relocate rcvhdrcnt module parameter check. Validate the rcvhdrcnt module parameter in a single function at module load time. This allows proper error reporting. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0f82eebc4b9e..e3b5bc93bc70 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -262,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->eager_base = base * dd->rcv_entries.group_size; - /* Validate and initialize Rcv Hdr Q variables */ - if (rcvhdrcnt % HDRQ_INCREMENT) { - dd_dev_err(dd, - "ctxt%u: header queue count %d must be divisible by %lu\n", - rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT); - goto bail; - } rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; /* @@ -1399,6 +1392,29 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } +static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) +{ + if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, "Receive header queue count too small\n"); + return -EINVAL; + } + + if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + return -EINVAL; + } + + if (thecnt % HDRQ_INCREMENT) { + hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); + return -EINVAL; + } + + return 0; +} + static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1409,18 +1425,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) HFI1_CAP_LOCK(); /* Validate some global module parameters */ - if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, "Header queue count too small\n"); - ret = -EINVAL; + ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); + if (ret) goto bail; - } - if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - ret = -EINVAL; - goto bail; - } + /* use the encoding function as a sanitization check */ if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", From 505efe3e46d5eaab726295cd023fb86d5b789d00 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Tue, 25 Oct 2016 13:12:17 -0700 Subject: [PATCH 246/305] IB/hfi1: Fix status error code for unsupported packets Set the status code BAD_L2 when unsupported type of packet is received and dropped. Reviewed-by: Dennis Dalessandro Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 3 +++ drivers/infiniband/hw/hfi1/driver.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 92345259a8f4..043fd21dc5f3 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -320,6 +320,9 @@ /* DC_DC8051_CFG_MODE.GENERAL bits */ #define DISABLE_SELF_GUID_CHECK 0x2 +/* Bad L2 frame error code */ +#define BAD_L2_ERR 0x6 + /* * Eager buffer minimum and maximum sizes supported by the hardware. * All power-of-two sizes in between are supported as well. diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..dadd35eedc01 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1360,12 +1360,25 @@ int process_receive_ib(struct hfi1_packet *packet) int process_receive_bypass(struct hfi1_packet *packet) { + struct hfi1_devdata *dd = packet->rcd->dd; + if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - dd_dev_err(packet->rcd->dd, + dd_dev_err(dd, "Bypass packets are not supported in normal operation. Dropping\n"); - incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); + incr_cntr64(&dd->sw_rcv_bypass_packet_errors); + if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { + u64 *flits = packet->ebuf; + + if (flits && !(packet->rhf & RHF_LEN_ERR)) { + dd->err_info_rcvport.packet_flit1 = flits[0]; + dd->err_info_rcvport.packet_flit2 = + packet->tlen > sizeof(flits[0]) ? flits[1] : 0; + } + dd->err_info_rcvport.status_and_code |= + (OPA_EI_STATUS_SMASK | BAD_L2_ERR); + } return RHF_RCV_CONTINUE; } From f2d8a0b367e735ab157222ce74a5f2481216c878 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Tue, 25 Oct 2016 13:12:23 -0700 Subject: [PATCH 247/305] IB/hfi1: Fix ECN processing in prescan_rxq When processing ECN via the prescan_rxq path, some fields in the packet structure are passed uninitialized. This can potentially cause NULL pointer exceptions during ECN handling. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index dadd35eedc01..c5efff29c147 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) dd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + hdr = packet->hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + packet->ohdr = &hdr->u.l.oth; + packet->rcv_flags |= HFI1_HAS_GRH; + } else { goto next; /* just in case */ + } - bth1 = be32_to_cpu(ohdr->bth[1]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); if (!is_ecn) @@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) /* turn off BECN, FECN */ bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); - ohdr->bth[1] = cpu_to_be32(bth1); + packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); } From 09a7908b1ba616eed349d49058ee909907ee0885 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 25 Oct 2016 13:12:40 -0700 Subject: [PATCH 248/305] IB/hfi1: Prevent hardware counter names from being cut off Increase the size of the buffer that is used to construct per-VL and per-SDMA counter names. Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 156ddf8f3dca..24d0820873cf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12098,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque) mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } -#define C_MAX_NAME 13 /* 12 chars + one for /0 */ +#define C_MAX_NAME 16 /* 15 chars + one for /0 */ static int init_cntrs(struct hfi1_devdata *dd) { int i, rcv_ctxts, j; From 2b16056f845207967a32497f41cf92b57849f934 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 25 Oct 2016 13:12:46 -0700 Subject: [PATCH 249/305] IB/hfi1: Remove incorrect IS_ERR check Remove IS_ERR check from caching code as the function being called does not actually return error pointers. Fixes: f19bd643dbde: "IB/hfi1: Prevent NULL pointer deferences in caching code" Reported-by: Dan Carpenter Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a761f804111e..77697d690f3e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req, rb_node = hfi1_mmu_rb_extract(pq->handler, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); - if (rb_node && !IS_ERR(rb_node)) + if (rb_node) node = container_of(rb_node, struct sdma_mmu_node, rb); else rb_node = NULL; From bc9db5ad3253c8e17969bd802c47b73e63f125ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 11 Nov 2016 19:14:24 +0200 Subject: [PATCH 250/305] drm/i915: Assume non-DP++ port if dvo_port is HDMI and there's no AUX ch specified in the VBT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My heuristic for detecting type 1 DVI DP++ adaptors based on the VBT port information apparently didn't survive the reality of buggy VBTs. In this particular case we have a machine with a natice HDMI port, but the VBT tells us it's a DP++ port based on its capabilities. The dvo_port information in VBT does claim that we're dealing with a HDMI port though, but we have other machines which do the same even when they actually have DP++ ports. So that piece of information alone isn't sufficient to tell the two apart. After staring at a bunch of VBTs from various machines, I have to conclude that the only other semi-reliable clue we can use is the presence of the AUX channel in the VBT. On this particular machine AUX channel is specified as zero, whereas on some of the other machines which listed the DP++ port as HDMI have a non-zero AUX channel. I've also seen VBTs which have dvo_port a DP but have a zero AUX channel. I believe those we need to treat as DP ports, so we'll limit the AUX channel check to just the cases where dvo_port is HDMI. If we encounter any more serious failures with this heuristic I think we'll have to have to throw it out entirely. But that could mean that there is a risk of type 1 DVI dongle users getting greeted by a black screen, so I'd rather not go there unless absolutely necessary. v2: Remove the duplicate PORT_A check (Daniel) Fix some typos in the commit message Cc: Daniel Otero Cc: stable@vger.kernel.org Tested-by: Daniel Otero Fixes: d61992565bd3 ("drm/i915: Determine DP++ type 1 DVI adaptor presence based on VBT") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97994 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478884464-14251-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter (cherry picked from commit 7a17995a3dc8613f778a9e2fd20e870f17789544) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 30 ++++++++++++++++++++------- drivers/gpu/drm/i915/intel_vbt_defs.h | 3 ++- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1f8af87c6294..cf2560708e03 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1143,7 +1143,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->raw[25]; + aux_channel = child->common.aux_channel; ddc_pin = child->common.ddc_pin; is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -1673,7 +1673,8 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; } -bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) +static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, + enum port port) { static const struct { u16 dp, hdmi; @@ -1687,22 +1688,35 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum por [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, }; - int i; if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) return false; - if (!dev_priv->vbt.child_dev_num) + if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != + (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) return false; + if (p_child->common.dvo_port == port_mapping[port].dp) + return true; + + /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */ + if (p_child->common.dvo_port == port_mapping[port].hdmi && + p_child->common.aux_channel != 0) + return true; + + return false; +} + +bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, + enum port port) +{ + int i; + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { const union child_device_config *p_child = &dev_priv->vbt.child_dev[i]; - if ((p_child->common.dvo_port == port_mapping[port].dp || - p_child->common.dvo_port == port_mapping[port].hdmi) && - (p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) == - (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) + if (child_dev_is_dp_dual_mode(p_child, port)) return true; } diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 68db9621f1f0..8886cab19f98 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -280,7 +280,8 @@ struct common_child_dev_config { u8 dp_support:1; u8 tmds_support:1; u8 support_reserved:5; - u8 not_common3[12]; + u8 aux_channel; + u8 not_common3[11]; u8 iboost_level; } __packed; From 2c8c34167c987e463d62a55384fcec7fa8d03a54 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 29 Sep 2016 12:59:39 +0300 Subject: [PATCH 251/305] mfd: lpss: Fix Intel Kaby Lake PCH-H properties There are a few issues on Intel Kaby Lake PCH-H properties added by commit a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs"): - Input clock of I2C controller on Intel Kaby Lake PCH-H is 120 MHz not 133 MHz. This was probably copy-paste error from Intel Broxton I2C properties. - There is no default I2C SDA hold time specified which is used when ACPI doesn't provide it. I got information from Windows driver team that Kaby Lake PCH-H can use the same configuration than Intel Sunrisepoint PCH. - Common HS-UART properties are not used. Fix these by reusing the Sunrisepoint properties on Kaby Lake PCH-H. Fixes: a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs") Reported-by: Xiang A Wang Signed-off-by: Jarkko Nikula Acked-by: Mika Westerberg Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss-pci.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 3228fd182a99..9ff243970e93 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -123,19 +123,6 @@ static const struct intel_lpss_platform_info apl_i2c_info = { .properties = apl_i2c_properties, }; -static const struct intel_lpss_platform_info kbl_info = { - .clk_rate = 120000000, -}; - -static const struct intel_lpss_platform_info kbl_uart_info = { - .clk_rate = 120000000, - .clk_con_id = "baudclk", -}; - -static const struct intel_lpss_platform_info kbl_i2c_info = { - .clk_rate = 133000000, -}; - static const struct pci_device_id intel_lpss_pci_ids[] = { /* BXT A-Step */ { PCI_VDEVICE(INTEL, 0x0aac), (kernel_ulong_t)&bxt_i2c_info }, @@ -207,15 +194,15 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa161), (kernel_ulong_t)&spt_i2c_info }, { PCI_VDEVICE(INTEL, 0xa166), (kernel_ulong_t)&spt_uart_info }, /* KBL-H */ - { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&kbl_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&spt_uart_info }, { } }; MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids); From 274e43edcda6f709aa67e436b3123e45a6270923 Mon Sep 17 00:00:00 2001 From: Azhar Shaikh Date: Wed, 12 Oct 2016 10:12:20 -0700 Subject: [PATCH 252/305] mfd: intel-lpss: Do not put device in reset state on suspend Commit 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") saved the register context while going to suspend and also put the device in reset state. Due to the resetting of device, system cannot enter S3/S0ix states when no_console_suspend flag is enabled. The system and serial console both hang. The resetting of device is not needed while going to suspend. Hence remove this code. Cc: stable@vger.kernel.org Fixes: 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") Signed-off-by: Azhar Shaikh Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 41b113875d64..70c646b0097d 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -502,9 +502,6 @@ int intel_lpss_suspend(struct device *dev) for (i = 0; i < LPSS_PRIV_REG_COUNT; i++) lpss->priv_ctx[i] = readl(lpss->priv + i * 4); - /* Put the device into reset state */ - writel(0, lpss->priv + LPSS_PRIV_RESETS); - return 0; } EXPORT_SYMBOL_GPL(intel_lpss_suspend); From 9600702082b29fd3f8a6d744df74ad4c48d4a432 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 17 Oct 2016 10:32:13 +0300 Subject: [PATCH 253/305] mfd: intel_soc_pmic_bxtwc: Fix usbc interrupt The wcove USB Type-C driver is currently being flooded with interrupts that are not targeted to it. The reason for that is because all CHRG first level interrupts are mapped to it. This fixes the issue by introducing separate irq for the usbc device, and mapping only USB Type-C PHY interrupts to it. Fixes: 9c6235c86332 ("mfd: intel_soc_pmic_bxtwc: Add bxt_wcove_usbc device") Signed-off-by: Heikki Krogerus Signed-off-by: Lee Jones --- drivers/mfd/intel_soc_pmic_bxtwc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c index 43e54b7e908f..f9a8c5203873 100644 --- a/drivers/mfd/intel_soc_pmic_bxtwc.c +++ b/drivers/mfd/intel_soc_pmic_bxtwc.c @@ -86,6 +86,7 @@ enum bxtwc_irqs_level2 { BXTWC_THRM2_IRQ, BXTWC_BCU_IRQ, BXTWC_ADC_IRQ, + BXTWC_USBC_IRQ, BXTWC_CHGR0_IRQ, BXTWC_CHGR1_IRQ, BXTWC_GPIO0_IRQ, @@ -111,7 +112,8 @@ static const struct regmap_irq bxtwc_regmap_irqs_level2[] = { REGMAP_IRQ_REG(BXTWC_THRM2_IRQ, 2, 0xff), REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 3, 0x1f), REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 4, 0xff), - REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x3f), + REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 5, BIT(5)), + REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x1f), REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 6, 0x1f), REGMAP_IRQ_REG(BXTWC_GPIO0_IRQ, 7, 0xff), REGMAP_IRQ_REG(BXTWC_GPIO1_IRQ, 8, 0x3f), @@ -146,7 +148,7 @@ static struct resource adc_resources[] = { }; static struct resource usbc_resources[] = { - DEFINE_RES_IRQ_NAMED(BXTWC_CHGR0_IRQ, "USBC"), + DEFINE_RES_IRQ(BXTWC_USBC_IRQ), }; static struct resource charger_resources[] = { From f40584200bc4af7aa4399635b9ac213c62a13ae7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Nov 2016 10:22:53 +0100 Subject: [PATCH 254/305] mfd: stmpe: Fix RESET regression on STMPE2401 Since commit c4dd1ba355aae2bc3d1213da6c66c53e3c31e028 ("mfd: stmpe: Add reset support for all STMPE variant") we're resetting the STMPE expanders before use. This caused a regression on the STMP2401 on the Nomadik NHK8815: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x43 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x44 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission It turns out that we start to poll for the reset bit to go low again too quickly: the STMPE2401 is not yet online and ready to be asked for the status of the RESET bit. By introducing a 10ms delay before starting to hammer the register for information, we get back to normal: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 Cc: stable@vger.kernel.org Cc: Amelie Delaunay Fixes: c4dd1ba355aa ("mfd: stmpe: Add reset support for all STMPE variant") Signed-off-by: Linus Walleij Acked-by: Patrice Chotard Signed-off-by: Lee Jones --- drivers/mfd/stmpe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index cfdae8a3d779..b0c7bcdaf5df 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -851,6 +851,8 @@ static int stmpe_reset(struct stmpe *stmpe) if (ret < 0) return ret; + msleep(10); + timeout = jiffies + msecs_to_jiffies(100); while (time_before(jiffies, timeout)) { ret = __stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_SYS_CTRL]); From 722f191080de641f023feaa7d5648caf377844f5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:38:18 +0100 Subject: [PATCH 255/305] mfd: core: Fix device reference leak in mfd_clone_cell Make sure to drop the reference taken by bus_find_device_by_name() before returning from mfd_clone_cell(). Fixes: a9bbba996302 ("mfd: add platform_device sharing support for mfd") Signed-off-by: Johan Hovold Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 3ac486a597f3..c57e407020f1 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -399,6 +399,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones) clones[i]); } + put_device(dev); + return 0; } EXPORT_SYMBOL(mfd_clone_cell); From 19ff7fcc76e6911a955742b40f85ba1030ccba5e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 16 Nov 2016 11:52:19 -0500 Subject: [PATCH 256/305] orangefs: add .owner to debugfs file_operations Without ".owner = THIS_MODULE" it is possible to crash the kernel by unloading the Orangefs module while someone is reading debugfs files. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index d484068ca716..38887cc5577f 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = { }; const struct file_operations debug_help_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_help_open, .read = seq_read, .release = seq_release, @@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = { }; static const struct file_operations kernel_debug_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_open, .read = orangefs_debug_read, .write = orangefs_debug_write, From d48756228ee9161ac8836b346589a43fabdc9f3c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Nov 2016 15:56:26 -0500 Subject: [PATCH 257/305] nvme/pci: Don't free queues on error The nvme_remove function tears down all allocated resources in the correct order, so no need to free queues on error during initialization. This fixes possible use-after-free errors when queues are still associated with a blk-mq hctx. Reported-by: Scott Bauer Tested-by: Scott Bauer Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0248d0e21fee..5e52034ab010 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1242,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) result = nvme_enable_ctrl(&dev->ctrl, cap); if (result) - goto free_nvmeq; + return result; nvmeq->cq_vector = 0; result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; - goto free_nvmeq; + return result; } return result; - - free_nvmeq: - nvme_free_queues(dev, 0); - return result; } static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) @@ -1317,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); - if (ret) { - nvme_free_queues(dev, i); + if (ret) break; - } } /* @@ -1460,13 +1454,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; - goto free_queues; + return result; } return nvme_create_io_queues(dev); - - free_queues: - nvme_free_queues(dev, 1); - return result; } static void nvme_del_queue_end(struct request *req, int error) From f9c22ec6c1c511285dc539b83aabdabdb6baf245 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 15 Nov 2016 17:39:02 -0500 Subject: [PATCH 258/305] gpio: Remove GPIO_DEVRES option This option was added in 6a89a314ab107a12af08c71420c19a37a30fc2d3 to allow use of the devm_gpio_* functions without CONFIG_GPIOLIB. However, only a few months later in b69ac52449c658b7ac40034dc3c5f5f4a71a723d, CONFIG_GPIOLIB was added as a dependency, defeating the original purpose of this option. Instead of that patch, the original commit could have just been reverted (and in fact was partially so in 403c1d0be5ccbd750d25c59d8358843a81e52e3b). Further, since this option has a dependency on HAS_IOMEM, even though it does not require it, it causes build failures when !HAS_IOMEM (e.g. in a uml build). Fix that by completely removing the option, in essence completing the reversion of the original commit. Signed-off-by: Keno Fischer Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 4 ---- drivers/gpio/Makefile | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d011cb89d25e..ed37e5908b91 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -22,10 +22,6 @@ menuconfig GPIOLIB if GPIOLIB -config GPIO_DEVRES - def_bool y - depends on HAS_IOMEM - config OF_GPIO def_bool y depends on OF diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index ab28a2daeacc..d074c2299393 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG -obj-$(CONFIG_GPIO_DEVRES) += devres.o +obj-$(CONFIG_GPIOLIB) += devres.o obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIOLIB) += gpiolib-legacy.o obj-$(CONFIG_OF_GPIO) += gpiolib-of.o From d5a4b1a540b8a9a44888b383472a80b84765aaa0 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Nov 2016 17:27:34 +0800 Subject: [PATCH 259/305] tools/power/acpi: Remove direct kernel source include reference Avoid breaking cross-compiled ACPI tools builds by rearranging the handling of kernel header files. This patch also contains OUTPUT/srctree cleanups in order to make above fix working for various build environments. Fixes: e323c02dee59 (ACPICA: MSVC9: Fix inclusion order issue) Reported-and-tested-by: Yisheng Xie Reported-by: Andy Shevchenko Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/aclinux.h | 3 ++ tools/power/acpi/Makefile.config | 23 +++++++------- tools/power/acpi/Makefile.rules | 40 ++++++++++++++++-------- tools/power/acpi/tools/acpidbg/Makefile | 4 +-- tools/power/acpi/tools/acpidbg/acpidbg.c | 8 ++++- tools/power/acpi/tools/acpidump/Makefile | 12 +++---- 6 files changed, 56 insertions(+), 34 deletions(-) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index a5d98d171866..e861a24f06f2 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -191,6 +191,9 @@ #ifndef __init #define __init #endif +#ifndef __iomem +#define __iomem +#endif /* Host-dependent types and defines for user-space ACPICA */ diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a538ff44b108..a1883bbb0144 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -8,18 +8,19 @@ # as published by the Free Software Foundation; version 2 # of the License. -include ../../../../scripts/Makefile.include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif -OUTPUT=./ +include $(srctree)/../../scripts/Makefile.include + +OUTPUT=$(srctree)/ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ -endif - -ifneq ($(OUTPUT),) -# check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) -$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) + OUTPUT := $(O)/power/acpi/ endif +#$(info Determined 'OUTPUT' to be $(OUTPUT)) # --- CONFIGURATION BEGIN --- @@ -70,8 +71,8 @@ WARNINGS := -Wall WARNINGS += $(call cc-supports,-Wstrict-prototypes) WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) -KERNEL_INCLUDE := ../../../include -ACPICA_INCLUDE := ../../../drivers/acpi/acpica +KERNEL_INCLUDE := $(OUTPUT)include +ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index ec87a9e562c0..373738338f51 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -8,28 +8,42 @@ # as published by the Free Software Foundation; version 2 # of the License. -$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE - $(ECHO) " LD " $@ - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@ +objdir := $(OUTPUT)tools/$(TOOL)/ +toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) +$(OUTPUT)$(TOOL): $(toolobjs) FORCE + $(ECHO) " LD " $(subst $(OUTPUT),,$@) + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ -$(OUTPUT)%.o: %.c - $(ECHO) " CC " $@ +$(KERNEL_INCLUDE): + $(ECHO) " MKDIR " $(subst $(OUTPUT),,$@) + $(QUIET) mkdir -p $(KERNEL_INCLUDE) + $(ECHO) " CP " $(subst $(OUTPUT),,$@) + $(QUIET) cp -rf $(srctree)/../../../include/acpi $(KERNEL_INCLUDE)/ + +$(objdir)%.o: %.c $(KERNEL_INCLUDE) + $(ECHO) " CC " $(subst $(OUTPUT),,$@) $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) clean: - -find $(OUTPUT) \( -not -type d \) \ - -and \( -name '*~' -o -name '*.[oas]' \) \ - -type f -print \ - | xargs rm -f - -rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMOBJ " $(subst $(OUTPUT),,$(objdir)) + $(QUIET) find $(objdir) \( -not -type d \)\ + -and \( -name '*~' -o -name '*.[oas]' \)\ + -type f -print | xargs rm -f + $(ECHO) " RM " $(TOOL) + $(QUIET) rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMINC " $(subst $(OUTPUT),,$(KERNEL_INCLUDE)) + $(QUIET) rm -rf $(KERNEL_INCLUDE) install-tools: - $(INSTALL) -d $(DESTDIR)${sbindir} - $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir} + $(ECHO) " INST " $(TOOL) + $(QUIET) $(INSTALL) -d $(DESTDIR)$(sbindir) + $(QUIET) $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)$(sbindir) uninstall-tools: - - rm -f $(DESTDIR)${sbindir}/$(TOOL) + $(ECHO) " UNINST " $(TOOL) + $(QUIET) rm -f $(DESTDIR)$(sbindir)/$(TOOL) install: all install-tools $(EXTRA_INSTALL) uninstall: uninstall-tools $(EXTRA_UNINSTALL) diff --git a/tools/power/acpi/tools/acpidbg/Makefile b/tools/power/acpi/tools/acpidbg/Makefile index 352df4b41ae9..f2d06e773eb4 100644 --- a/tools/power/acpi/tools/acpidbg/Makefile +++ b/tools/power/acpi/tools/acpidbg/Makefile @@ -17,9 +17,7 @@ vpath %.c \ ../../os_specific/service_layers\ . CFLAGS += -DACPI_APPLICATION -DACPI_SINGLE_THREAD -DACPI_DEBUGGER\ - -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include + -I. LDFLAGS += -lpthread TOOL_OBJS = \ acpidbg.o diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c index a88ac45b7756..4308362d7068 100644 --- a/tools/power/acpi/tools/acpidbg/acpidbg.c +++ b/tools/power/acpi/tools/acpidbg/acpidbg.c @@ -12,10 +12,16 @@ #include /* Headers not included by include/acpi/platform/aclinux.h */ +#include +#include +#include +#include +#include #include #include #include -#include +#include +#include "../../../../../include/linux/circ_buf.h" #define ACPI_AML_FILE "/sys/kernel/debug/acpi/acpidbg" #define ACPI_AML_SEC_TICK 1 diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile index 04b5db7c7c0b..f7c7af1f9258 100644 --- a/tools/power/acpi/tools/acpidump/Makefile +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -19,9 +19,7 @@ vpath %.c \ ./\ ../../common\ ../../os_specific/service_layers -CFLAGS += -DACPI_DUMP_APP -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include +CFLAGS += -DACPI_DUMP_APP -I. TOOL_OBJS = \ apdump.o\ apfiles.o\ @@ -49,7 +47,9 @@ TOOL_OBJS = \ include ../../Makefile.rules -install-man: ../../man/acpidump.8 - $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8 +install-man: $(srctree)/man/acpidump.8 + $(ECHO) " INST " acpidump.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/acpidump.8 uninstall-man: - - rm -f $(DESTDIR)${mandir}/man8/acpidump.8 + $(ECHO) " UNINST " acpidump.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/acpidump.8 From 2a3811068fbc6bf09bb09d166b65394b091c1085 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 16 Nov 2016 23:51:19 +0000 Subject: [PATCH 260/305] ARM: Fix XIP kernels Commit 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") caused a regression with XIP kernels by moving the __ro_after_init data into the read-only section. With XIP kernels, the read-only section is located in read-only memory from the very beginning. Work around this by moving the __ro_after_init data back into the .data section, which will be in RAM, and hence will be writable. It should be noted that in doing so, this remains writable after init. Fixes: 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") Reported-by: Andrea Merello Tested-by: Andrea Merello [ XIP stm32 ] Tested-by: Alexandre Torgue Signed-off-by: Russell King --- arch/arm/kernel/vmlinux-xip.lds.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 7fa487ef7e2f..37b2a11af345 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -3,6 +3,9 @@ * Written by Martin Mares */ +/* No __ro_after_init data in the .rodata section - which will always be ro */ +#define RO_AFTER_INIT_DATA + #include #include #include @@ -223,6 +226,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; + *(.data..ro_after_init) + NOSAVE_DATA CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) READ_MOSTLY_DATA(L1_CACHE_BYTES) From 5b810a242c28e1d8d64d718cebe75b79d86a0b2d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2016 16:36:26 +0300 Subject: [PATCH 261/305] IB/uverbs: Fix leak of XRC target QPs The real QP is destroyed in case of the ref count reaches zero, but for XRC target QPs this call was missed and caused to QP leaks. Let's call to destroy for all flows. Fixes: 0e0ec7e0638e ('RDMA/core: Export ib_open_qp() to share XRC...') Signed-off-by: Tariq Toukan Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0012fa58c105..44b1104eb168 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp != qp->real_qp) { - ib_close_qp(qp); - } else { + if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - } + ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } From 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:27 +0300 Subject: [PATCH 262/305] IB/cm: Mark stale CM id's whenever the mad agent was unregistered When there is a CM id object that has port assigned to it, it means that the cm-id asked for the specific port that it should go by it, but if that port was removed (hot-unplug event) the cm-id was not updated. In order to fix that the port keeps a list of all the cm-id's that are planning to go by it, whenever the port is removed it marks all of them as invalid. This commit fixes a kernel panic which happens when running traffic between guests and we force reboot a guest mid traffic, it triggers a kernel panic: Call Trace: [] ? panic+0xa7/0x16f [] ? oops_end+0xe4/0x100 [] ? no_context+0xfb/0x260 [] ? del_timer_sync+0x22/0x30 [] ? __bad_area_nosemaphore+0x125/0x1e0 [] ? process_timeout+0x0/0x10 [] ? bad_area_nosemaphore+0x13/0x20 [] ? __do_page_fault+0x31f/0x480 [] ? default_wake_function+0x0/0x20 [] ? free_msg+0x55/0x70 [mlx5_core] [] ? cmd_exec+0x124/0x840 [mlx5_core] [] ? find_busiest_group+0x244/0x9f0 [] ? do_page_fault+0x3e/0xa0 [] ? page_fault+0x25/0x30 [] ? cm_alloc_msg+0x35/0xc0 [ib_cm] [] ? ib_send_cm_dreq+0xb1/0x1e0 [ib_cm] [] ? cm_destroy_id+0x176/0x320 [ib_cm] [] ? ib_destroy_cm_id+0x10/0x20 [ib_cm] [] ? ipoib_cm_free_rx_reap_list+0xa7/0x110 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x0/0x20 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x15/0x20 [ib_ipoib] [] ? worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] ? kthread+0x96/0xa0 [] ? child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Signed-off-by: Mark Bloch Signed-off-by: Erez Shitrit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 126 ++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c99525512b34..71c7c4c328ef 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -80,6 +80,8 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; + /* Sync on cm change port state */ + spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -161,6 +163,8 @@ struct cm_port { struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; + struct list_head cm_priv_prim_list; + struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; @@ -241,6 +245,12 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; + struct list_head prim_list; + struct list_head altr_list; + /* Indicates that the send port mad is registered and av is set */ + int prim_send_port_not_ready; + int altr_send_port_not_ready; + struct list_head work_list; atomic_t work_count; }; @@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; + struct cm_av *av; + unsigned long flags, flags2; + int ret = 0; + /* don't let the port to be released till the agent is down */ + spin_lock_irqsave(&cm.state_lock, flags2); + spin_lock_irqsave(&cm.lock, flags); + if (!cm_id_priv->prim_send_port_not_ready) + av = &cm_id_priv->av; + else if (!cm_id_priv->altr_send_port_not_ready && + (cm_id_priv->alt_av.port)) + av = &cm_id_priv->alt_av; + else { + pr_info("%s: not valid CM id\n", __func__); + ret = -ENODEV; + spin_unlock_irqrestore(&cm.lock, flags); + goto out; + } + spin_unlock_irqrestore(&cm.lock, flags); + /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; - ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); - if (IS_ERR(ah)) - return PTR_ERR(ah); + if (!mad_agent) { + pr_info("%s: not a valid MAD agent\n", __func__); + ret = -ENODEV; + goto out; + } + ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto out; + } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - cm_id_priv->av.pkey_index, + av->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); - return PTR_ERR(m); + ret = PTR_ERR(m); + goto out; } /* Timeout set by caller if response is expected. */ @@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; - return 0; + +out: + spin_unlock_irqrestore(&cm.state_lock, flags2); + return ret; } static int cm_alloc_response_msg(struct cm_port *port, @@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, + struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; struct cm_port *port = NULL; @@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) &av->ah_attr); av->timeout = path->packet_life_time + 1; - return 0; + spin_lock_irqsave(&cm.lock, flags); + if (&cm_id_priv->av == av) + list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); + else if (&cm_id_priv->alt_av == av) + list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); + else + ret = -EINVAL; + + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) @@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->prim_list); + INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; @@ -892,6 +945,15 @@ retest: break; } + spin_lock_irq(&cm.lock); + if (!list_empty(&cm_id_priv->altr_list) && + (!cm_id_priv->altr_send_port_not_ready)) + list_del(&cm_id_priv->altr_list); + if (!list_empty(&cm_id_priv->prim_list) && + (!cm_id_priv->prim_send_port_not_ready)) + list_del(&cm_id_priv->prim_list); + spin_unlock_irq(&cm.lock); + cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); @@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + cm_id_priv); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, - &cm_id_priv->alt_av); + &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; } @@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work) dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + cm_id_priv); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (req_msg->alt_local_lid) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, + cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + cm_id_priv); if (ret) goto out; cm_id_priv->alt_av.timeout = @@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work) cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_id_priv); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); if (ret) goto out; @@ -3468,7 +3535,9 @@ out: static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; + struct cm_av tmp_av; unsigned long flags; + int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; + /* Swap address vector */ + tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; + cm_id_priv->alt_av = tmp_av; + /* Swap port send ready state */ + tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; + cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; + cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; + INIT_LIST_HEAD(&port->cm_priv_prim_list); + INIT_LIST_HEAD(&port->cm_priv_altr_list); + ret = cm_create_port_fs(port); if (ret) goto error1; @@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; + struct cm_id_private *cm_id_priv; + struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); + /* Mark all the cm_id's as not valid */ + spin_lock_irq(&cm.lock); + list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) + cm_id_priv->altr_send_port_not_ready = 1; + list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) + cm_id_priv->prim_send_port_not_ready = 1; + spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - ib_unregister_mad_agent(port->mad_agent); + spin_lock_irq(&cm.state_lock); + cur_mad_agent = port->mad_agent; + port->mad_agent = NULL; + spin_unlock_irq(&cm.state_lock); + ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); } + device_unregister(cm_dev->device); kfree(cm_dev); } @@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); + spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; From aeb76df46d1158d5f7f3d30f993a1bb6ee9c67a0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 31 Oct 2016 07:50:56 +0200 Subject: [PATCH 263/305] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks On Thu, Oct 27, 2016 at 04:36:28PM +0300, Leon Romanovsky wrote: > From: Mark Bloch > > If the underlying netowrk type is ipv4 or ipv6 and the device supports > routable RoCE, prefer it so the traffic could cross subnets. > > Signed-off-by: Mark Bloch > Signed-off-by: Maor Gottlieb > Signed-off-by: Leon Romanovsky > --- Hi Doug, Please take the following v1 of this patch where I fixed spelling error from "netowrk" to be "network". Thanks. >From 09f96ba3e9b4442cfb44dca04c6726e55525c9c3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 11 Sep 2016 06:25:10 +0000 Subject: [PATCH rdma-rc v1 3/6] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks If the underlying network type is ipv4 or ipv6 and the device supports routable RoCE, prefer it so the traffic could cross subnets. Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36bf50ebb187..9ca0da0a37c4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2436,6 +2436,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) return 0; } +static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, + unsigned long supported_gids, + enum ib_gid_type default_gid) +{ + if ((network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) && + test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + return default_gid; +} + static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; @@ -2461,6 +2473,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; if (addr->dev_addr.bound_dev_if) { + unsigned long supported_gids; + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; @@ -2484,7 +2498,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->net = &init_net; route->path_rec->ifindex = ndev->ifindex; - route->path_rec->gid_type = id_priv->gid_type; + supported_gids = roce_gid_type_mask_support(id_priv->id.device, + id_priv->id.port_num); + route->path_rec->gid_type = + cma_route_gid_type(addr->dev_addr.network, + supported_gids, + id_priv->gid_type); } if (!ndev) { ret = -ENODEV; From 61c3702863be9e9f1ef12ed5a5b17bae6cdfac0b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:29 +0300 Subject: [PATCH 264/305] IB/core: Add missing check for addr_resolve callback return value When calling rdma_resolve_ip inside rdma_addr_find_l2_eth_by_grh, the return status of the request was ignored in the callback function causing a successful return and an empty dmac. Signed-off-by: Mark Bloch Signed-off-by: Alex Vesker Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b136d3acc5bd..0f58f46dbad7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel); struct resolve_cb_context { struct rdma_dev_addr *addr; struct completion comp; + int status; }; static void resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { - memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct - rdma_dev_addr)); + if (!status) + memcpy(((struct resolve_cb_context *)context)->addr, + addr, sizeof(struct rdma_dev_addr)); + ((struct resolve_cb_context *)context)->status = status; complete(&((struct resolve_cb_context *)context)->comp); } @@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, wait_for_completion(&ctx.comp); + ret = ctx.status; + if (ret) + return ret; + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); if (!dev) From 3c7ba5760ab8eedec01159b267bb9bfcffe522ac Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:31 +0300 Subject: [PATCH 265/305] IB/core: Avoid unsigned int overflow in sg_alloc_table sg_alloc_table gets unsigned int as parameter while the driver returns it as size_t. Check npages isn't greater than maximum unsigned int. Fixes: eeb8461e36c9 ("IB: Refactor umem to use linear SG table") Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 224ad274ea0b..84b4eff90395 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - if (npages == 0) { + if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; goto out; } From 90be7c8ab72853ff9fc407f01518a898df1f3045 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 27 Oct 2016 16:36:39 +0300 Subject: [PATCH 266/305] IB/mlx5: Fix memory leak in query device We need to free dev->port when we fail to enable RoCE or initialize node data. Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters') Signed-off-by: Majd Dibbiny Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22174774dbb8..bb61487861cc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3115,7 +3115,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) } err = init_node_data(dev); if (err) - goto err_dealloc; + goto err_free_port; mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); @@ -3125,7 +3125,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); if (err) - goto err_dealloc; + goto err_free_port; } err = create_dev_resources(&dev->devr); From efd7f40082a0dfd112eb87ff2124467a5739216f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:36:40 +0300 Subject: [PATCH 267/305] IB/mlx5: Validate requested RQT size Validate that the requested size of RQT is supported by firmware. Fixes: c5f9092936fe ('IB/mlx5: Add Receive Work Queue Indirection table operations') Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2afbcdd..2be0d06b27dc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4815,6 +4815,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, udata->inlen)) return ERR_PTR(-EOPNOTSUPP); + if (init_attr->log_ind_tbl_size > + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { + mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", + init_attr->log_ind_tbl_size, + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); + return ERR_PTR(-EINVAL); + } + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); From 16b0e0695a73b68d8ca40288c8f9614ef208917b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 27 Oct 2016 16:36:41 +0300 Subject: [PATCH 268/305] IB/mlx5: Use cache line size to select CQE stride When creating kernel CQs use 128B CQE stride if the cache line size is 128B, 64B otherwise. This prevents multiple CQEs from residing in a 128B cache line, which can cause retries when there are concurrent read and writes in one cache line. Tested with IPoIB on PPC64, saw ~5% throughput improvement. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Daniel Jurgens Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 79d017baf6f4..fcd04b881ec1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, if (err) goto err_create; } else { - /* for now choose 64 bytes till we have a proper interface */ - cqe_size = 64; + cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) From 6bc1a656ab9f57f0112823b4a36930c9a29d1f89 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Thu, 27 Oct 2016 16:36:42 +0300 Subject: [PATCH 269/305] IB/mlx5: Resolve soft lock on massive reg MRs When calling reg_mr of large MRs (e.g. 4GB) from multiple processes and MR caches can't supply the required amount of MRs the slow-path of MR allocation may be used. In this case we need to serialize the slow-path between the processes to avoid soft lock. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Moshe Lazer Signed-off-by: Maor Gottlieb Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dcdcd195fe53..7d689903c87c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,8 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; + /* Prevents soft lock on massive reg MRs */ + struct mutex slow_path_mutex; int fill_delay; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4ad672b905b..4e9012463c37 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int err; int i; + mutex_init(&dev->slow_path_mutex); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto error; } - if (!mr) + if (!mr) { + mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, page_shift, access_flags); + mutex_unlock(&dev->slow_path_mutex); + } if (IS_ERR(mr)) { err = PTR_ERR(mr); From dbaaff2a2caa03d472b5cc53a3fbfd415c97dc26 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:44 +0300 Subject: [PATCH 270/305] IB/mlx5: Fix fatal error dispatching When an internal error condition is detected, make sure to set the device inactive after dispatching the event so ULPs can get a notification of this event. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Mohamad Haj Yahia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bb61487861cc..a014ad38d889 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, { struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; struct ib_event ibev; - + bool fatal = false; u8 port = 0; switch (event) { case MLX5_DEV_EVENT_SYS_ERROR: - ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + fatal = true; break; case MLX5_DEV_EVENT_PORT_UP: @@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (ibdev->ib_active) ib_dispatch_event(&ibev); + + if (fatal) + ibdev->ib_active = false; } static void get_ext_port_caps(struct mlx5_ib_dev *dev) From a1ab8402d15d2305d2315d96ec3294bfdf16587e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:46 +0300 Subject: [PATCH 271/305] IB/mlx5: Fix NULL pointer dereference on debug print For XRC QP CQs may not exist. Check before attempting dereference. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2be0d06b27dc..59c4c89460d1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2052,8 +2052,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, - to_mcq(init_attr->recv_cq)->mcq.cqn, - to_mcq(init_attr->send_cq)->mcq.cqn); + init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, + init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); qp->trans_qp.xrcdn = xrcdn; From 37995116fecfce2b61ee3da6e73b3e394c6818f9 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 10 Nov 2016 11:30:54 +0200 Subject: [PATCH 272/305] IB/mlx4: Check gid_index return value Check the returned GID index value and return an error if it is invalid. Fixes: 5070cd2239bd ('IB/mlx4: Replace mechanism for RoCE GID management') Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5fc623362731..b9bf0759f10a 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = ah_attr->grh.hop_limit; if (ah_attr->static_rate) { From 593ff73bcfdc79f79a8a0df55504f75ad3e5d1a9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Nov 2016 11:30:55 +0200 Subject: [PATCH 273/305] IB/mlx4: Fix create CQ error flow Currently, if ib_copy_to_udata fails, the CQ won't be deleted from the radix tree and the HW (HW2SW). Fixes: 225c7b1feef1 ('IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters') Signed-off-by: Matan Barak Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 1ea686b9e0f9..6a0fec357dae 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; - goto err_dbmap; + goto err_cq_free; } return &cq->ibcq; +err_cq_free: + mlx4_cq_free(dev->dev, &cq->mcq); + err_dbmap: if (context) mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); From 1454ca3a97e147bb91e98b087446c39cf6692a48 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:14 +0200 Subject: [PATCH 274/305] IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum Missing initialization of udp_tunnel_sock_cfg causes to following kernel panic, while kernel tries to execute gro_receive(). While being there, we converted udp_port_cfg to use the same initialization scheme as udp_tunnel_sock_cfg. ------------[ cut here ]------------ kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle kernel paging request at ffffffffa0588c50 IP: [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] PGD 1c09067 PUD 1c0a063 PMD bb394067 PTE 80000000ad5e8163 Oops: 0011 [#1] SMP Modules linked in: ib_rxe ip6_udp_tunnel udp_tunnel CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc3+ #2 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 task: ffff880235e4e680 ti: ffff880235e68000 task.ti: ffff880235e68000 RIP: 0010:[] [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP: 0018:ffff880237343c80 EFLAGS: 00010282 RAX: 00000000dffe482d RBX: ffff8800ae330900 RCX: 000000002001b712 RDX: ffff8800ae330900 RSI: ffff8800ae102578 RDI: ffff880235589c00 RBP: ffff880237343cb0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ae33e262 R13: ffff880235589c00 R14: 0000000000000014 R15: ffff8800ae102578 FS: 0000000000000000(0000) GS:ffff880237340000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0588c50 CR3: 0000000001c06000 CR4: 00000000000006e0 Stack: ffffffff8160860e ffff8800ae330900 ffff8800ae102578 0000000000000014 000000000000004e ffff8800ae102578 ffff880237343ce0 ffffffff816088fb 0000000000000000 ffff8800ae330900 0000000000000000 00000000ffad0000 Call Trace: [] ? udp_gro_receive+0xde/0x130 [] udp4_gro_receive+0x10b/0x2d0 [] inet_gro_receive+0x1d3/0x270 [] dev_gro_receive+0x269/0x3b0 [] napi_gro_receive+0x38/0x120 [] mlx5e_handle_rx_cqe+0x27e/0x340 [mlx5_core] [] mlx5e_poll_rx_cq+0x66/0x6d0 [mlx5_core] [] mlx5e_napi_poll+0x8e/0x400 [mlx5_core] [] net_rx_action+0x160/0x380 [] __do_softirq+0xd7/0x2c5 [] irq_exit+0xf5/0x100 [] do_IRQ+0x56/0xd0 [] common_interrupt+0x8c/0x8c [] ? native_safe_halt+0x6/0x10 [] default_idle+0x1e/0xd0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x3c/0x50 [] cpu_startup_entry+0x323/0x3c0 [] start_secondary+0x15c/0x1a0 RIP [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP CR2: ffffffffa0588c50 ---[ end trace 489ee31fa7614ac5 ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ------------[ cut here ]------------ Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index b8258e4f0aea..ffff5a54cb34 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, { int err; struct socket *sock; - struct udp_port_cfg udp_cfg; - struct udp_tunnel_sock_cfg tnl_cfg; - - memset(&udp_cfg, 0, sizeof(udp_cfg)); + struct udp_port_cfg udp_cfg = {0}; + struct udp_tunnel_sock_cfg tnl_cfg = {0}; if (ipv6) { udp_cfg.family = AF_INET6; @@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return ERR_PTR(err); } - tnl_cfg.sk_user_data = NULL; tnl_cfg.encap_type = 1; tnl_cfg.encap_rcv = rxe_udp_encap_recv; - tnl_cfg.encap_destroy = NULL; /* Setup UDP tunnel */ setup_udp_tunnel_sock(net, sock, &tnl_cfg); From 002e062e13db10973adb8302f231e48b477c7ccf Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:15 +0200 Subject: [PATCH 275/305] IB/rxe: Fix handling of erroneous WR To correctly handle a erroneous WR this fix does the following 1. Make sure the bad WQE causes a user completion event. 2. Call rxe_completer to handle the erred WQE. Before the fix, when rxe_requester found a bad WQE, it changed its status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs. If this was the 1st WQE then there would be no ACK to invoke the completer and this bad WQE would be stuck in the QP's send-q. On top of that the requester exiting with 0 caused rxe_do_task to endlessly invoke rxe_requester, resulting in a soft-lockup attached below. In case the WQE was not the 1st and rxe_completer did get a chance to handle the bad WQE, it did not cause a complete event since the WQE's IB_SEND_SIGNALED flag was not set. Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec version 1.2.1, section 10.7.3.1 Signaled Completions. NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe] [] lookup_mem+0x3c/0xc0 [rdma_rxe] [] copy_data+0x1c4/0x230 [rdma_rxe] [] rxe_requester+0x9d0/0x1100 [rdma_rxe] [] ? kfree_skbmem+0x5a/0x60 [] rxe_do_task+0x89/0xf0 [rdma_rxe] [] rxe_run_task+0x12/0x30 [rdma_rxe] [] rxe_post_send+0x41a/0x550 [rdma_rxe] [] ? __kmalloc+0x182/0x200 [] ? down_read+0x12/0x40 [] ib_uverbs_post_send+0x532/0x540 [ib_uverbs] [] ? tcp_sendmsg+0x402/0xb80 [] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs] [] ? inet_recvmsg+0x7e/0xb0 [] ? sock_recvmsg+0x3d/0x50 [] __vfs_write+0x37/0x140 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 832846b73ea0..22bd9630dcd9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -696,7 +696,8 @@ next_wqe: qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - goto complete; + __rxe_do_task(&qp->comp.task); + return 0; } payload = mtu; } @@ -745,13 +746,17 @@ err: wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; -complete: - if (qp_type(qp) != IB_QPT_RC) { - while (rxe_completer(qp) == 0) - ; - } - - return 0; + /* + * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS + * ---------8<---------8<------------- + * ...Note that if a completion error occurs, a Work Completion + * will always be generated, even if the signaling + * indicator requests an Unsignaled Completion. + * ---------8<---------8<------------- + */ + wqe->wr.send_flags |= IB_SEND_SIGNALED; + __rxe_do_task(&qp->comp.task); + return -EAGAIN; exit: return -EAGAIN; From aa75b07b478a774b1432e2df1be5cd8ae834de0f Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:17 +0200 Subject: [PATCH 276/305] IB/rxe: Clear queue buffer when modifying QP to reset RXE resets the send-q only once in rxe_qp_init_req() when QP is created, but when the QP is reused after QP reset, the send-q holds previous garbage data. This garbage data wrongly fails CQEs that otherwise should have completed successfully. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + drivers/infiniband/sw/rxe/rxe_queue.c | 9 +++++++++ drivers/infiniband/sw/rxe/rxe_queue.h | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b8036cfbce04..95aaaa282a04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); + rxe_queue_reset(qp->sq.queue); } /* cleanup attributes */ diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 08274254eb88..d14bf496d62d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -84,6 +84,15 @@ err1: return -EINVAL; } +inline void rxe_queue_reset(struct rxe_queue *q) +{ + /* queue is comprised from header and the memory + * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h + * reset only the queue itself and not the management header + */ + memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf)); +} + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 239fd609c31e..8c8641c87817 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe, size_t buf_size, struct rxe_mmap_info **ip_p); +void rxe_queue_reset(struct rxe_queue *q); + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size); From 6d931308f55faaef3f30bd0346c47f99528b229d Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:18 +0200 Subject: [PATCH 277/305] IB/rxe: Update qp state for user query The method rxe_qp_error() transitions QP to error state and make sure the QP is drained. It did not though update the QP state for user's query. This patch fixes this. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 95aaaa282a04..c3e60e4bde6e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -574,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; + qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ rxe_run_task(&qp->resp.task, 1); From 4ff522ea47944ffd3d4d27023ace8bc6a722c834 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 18 Oct 2016 14:04:39 -0700 Subject: [PATCH 278/305] iw_cxgb4: set *bad_wr for post_send/post_recv errors There are a few cases in c4iw_post_send() and c4iw_post_receive() where *bad_wr is not set when an error is returned. This can cause a crash if the application tries to use bad_wr. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f57deba6717c..5790e1dbd618 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -797,11 +797,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { @@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { From 5c6b2aaf9316fd0983c0c999d920306ddc65bd2d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 3 Nov 2016 12:09:38 -0700 Subject: [PATCH 279/305] iw_cxgb4: invalidate the mr when posting a read_w_inv wr Also, rearrange things a bit to have a common c4iw_invalidate_mr() function used everywhere that we need to invalidate. Fixes: 49b53a93a64a ("iw_cxgb4: add fast-path for small REG_MR operations") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 17 +++-------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 12 ++++++++++++ drivers/infiniband/hw/cxgb4/qp.c | 16 ++++++++-------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 867b8cf82be8..19c6477af19f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -666,18 +666,6 @@ skip_cqe: return ret; } -static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) -{ - struct c4iw_mr *mhp; - unsigned long flags; - - spin_lock_irqsave(&rhp->lock, flags); - mhp = get_mhp(rhp, rkey >> 8); - if (mhp) - mhp->attr.state = 0; - spin_unlock_irqrestore(&rhp->lock, flags); -} - /* * Get one cq entry from c4iw and map it to openib. * @@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; - invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) /* Invalidate the MR if the fastreg failed */ if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) - invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); + c4iw_invalidate_mr(qhp->rhp, + CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7e7f79e55006..4788e1a46fde 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -999,6 +999,6 @@ extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_drain_rq(struct ib_qp *qp); void c4iw_drain_sq(struct ib_qp *qp); - +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 80e27749420a..410408f886c1 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) kfree(mhp); return 0; } + +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5790e1dbd618..b7ac97b27c88 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { - struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); - - mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); @@ -842,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { + c4iw_invalidate_mr(qhp->rhp, + wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; - else + } else { fw_flags = 0; + } err = build_rdma_read(wqe, wr, &len16); if (err) break; @@ -878,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(qhp->rhp, wqe, wr, &len16); + err = build_inv_stag(wqe, wr, &len16); + c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); break; default: PDBG("%s post of type=%d TBD!\n", __func__, From 4a59015372840a6fc35d7fd40638a9d5dc3ec958 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 13 Nov 2016 21:23:34 +0100 Subject: [PATCH 280/305] xattr: Fix setting security xattrs on sockfs The IOP_XATTR flag is set on sockfs because sockfs supports getting the "system.sockprotoname" xattr. Since commit 6c6ef9f2, this flag is checked for setxattr support as well. This is wrong on sockfs because security xattr support there is supposed to be provided by security_inode_setsecurity. The smack security module relies on socket labels (xattrs). Fix this by adding a security xattr handler on sockfs that returns -EAGAIN, and by checking for -EAGAIN in setxattr. We cannot simply check for -EOPNOTSUPP in setxattr because there are filesystems that neither have direct security xattr support nor support via security_inode_setsecurity. A more proper fix might be to move the call to security_inode_setsecurity into sockfs, but it's not clear to me if that is safe: we would end up calling security_inode_post_setxattr after that as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/xattr.c | 22 ++++++++++++++-------- net/socket.c | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 3368659c471e..2d13b4e62fae 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - int error = -EOPNOTSUPP; + int error = -EAGAIN; int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); @@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (issec) { - const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - + } else { if (unlikely(is_bad_inode(inode))) return -EIO; - error = security_inode_setsecurity(inode, suffix, value, - size, flags); - if (!error) - fsnotify_xattr(dentry); + } + if (error == -EAGAIN) { + error = -EOPNOTSUPP; + + if (issec) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } } return error; diff --git a/net/socket.c b/net/socket.c index 272518b087c8..73dc69f9681e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = { .get = sockfs_xattr_get, }; +static int sockfs_security_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + /* Handled by LSM. */ + return -EAGAIN; +} + +static const struct xattr_handler sockfs_security_xattr_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .set = sockfs_security_xattr_set, +}; + static const struct xattr_handler *sockfs_xattr_handlers[] = { &sockfs_xattr_handler, + &sockfs_security_xattr_handler, NULL }; From 680bb946a1ae04fe0ff369a4965f76b48c07dc54 Mon Sep 17 00:00:00 2001 From: Abhi Das Date: Wed, 16 Nov 2016 21:44:23 -0600 Subject: [PATCH 281/305] fix iov_iter_advance() for ITER_PIPE iov_iter_advance() needs to decrement iter->count by the number of bytes we'd moved beyond. Normal flavours do that, but ITER_PIPE doesn't and ITER_PIPE generic_file_read_iter() for O_DIRECT files ends up with a bogus fallback to page cache read, resulting in incorrect values for file offset and bytes read. Signed-off-by: Abhi Das Signed-off-by: Al Viro --- lib/iov_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..f2bd21b93dfc 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -683,10 +683,11 @@ static void pipe_advance(struct iov_iter *i, size_t size) struct pipe_inode_info *pipe = i->pipe; struct pipe_buffer *buf; int idx = i->idx; - size_t off = i->iov_offset; + size_t off = i->iov_offset, orig_sz; if (unlikely(i->count < size)) size = i->count; + orig_sz = size; if (size) { if (off) /* make it relative to the beginning of buffer */ @@ -713,6 +714,7 @@ static void pipe_advance(struct iov_iter *i, size_t size) pipe->nrbufs--; } } + i->count -= orig_sz; } void iov_iter_advance(struct iov_iter *i, size_t size) From d5afc1b68a6ddc27746d31f775025afe75ec8122 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 10:24:15 -0800 Subject: [PATCH 282/305] dmaengine: cppi41: More PM runtime fixes Fix use of u32 instead of int for checking for negative errors values as pointed out by Dan Carpenter . And while testing the PM runtime error path by randomly returning failed values in runtime resume, I noticed two more places that need fixing: - If pm_runtime_get_sync() fails in probe, we still need to do pm_runtime_put_sync() to keep the use count happy. We could call pm_runtime_put_noidle() on the error path, but we're just going to call pm_runtime_disable() after that so pm_runtime_put_sync() will do what we want - We should print an error if pm_runtime_get_sync() fails in cppi41_dma_alloc_chan_resources() so we know where it happens Reported-by: Dan Carpenter Fixes: 740b4be3f742 ("dmaengine: cpp41: Fix handling of error path") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 4b52126c13cf..d5ba43a87a68 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -317,11 +317,12 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + int error; - status = pm_runtime_get(cdd->ddev.dev); - if (status < 0) + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", - __func__, status); + __func__, error); q_num = __fls(val); val &= ~(1 << q_num); @@ -367,6 +368,8 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) error = pm_runtime_get_sync(cdd->ddev.dev); if (error < 0) { + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, error); pm_runtime_put_noidle(cdd->ddev.dev); return error; @@ -1072,8 +1075,8 @@ err_chans: deinit_cppi41(dev, cdd); err_init_cppi: pm_runtime_dont_use_autosuspend(dev); - pm_runtime_put_sync(dev); err_get_sync: + pm_runtime_put_sync(dev); pm_runtime_disable(dev); iounmap(cdd->usbss_mem); iounmap(cdd->ctrl_mem); From a5a40d4624cd2328c69768f6eb41716fc249d7be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 23:29:10 +0200 Subject: [PATCH 283/305] crypto: caam - fix type mismatch warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the caam driver on arm64 produces a harmless warning: drivers/crypto/caam/caamalg.c:140:139: warning: comparison of distinct pointer types lacks a cast We can use min_t to tell the compiler which type we want it to use here. Fixes: 5ecf8ef9103c ("crypto: caam - fix sg dump") Signed-off-by: Arnd Bergmann Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index f5a63ba97023..954a64c7757b 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -137,7 +137,7 @@ static void dbg_dump_sg(const char *level, const char *prefix_str, } buf = it_page + it->offset; - len = min(tlen, it->length); + len = min_t(size_t, tlen, it->length); print_hex_dump(level, prefix_str, prefix_type, rowsize, groupsize, buf, len, ascii); tlen -= len; From 5d1904204c99596b50a700f092fe49d78edba400 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 10 Nov 2016 17:16:33 +0800 Subject: [PATCH 284/305] mremap: fix race between mremap() and page cleanning Prior to 3.15, there was a race between zap_pte_range() and page_mkclean() where writes to a page could be lost. Dave Hansen discovered by inspection that there is a similar race between move_ptes() and page_mkclean(). We've been able to reproduce the issue by enlarging the race window with a msleep(), but have not been able to hit it without modifying the code. So, we think it's a real issue, but is difficult or impossible to hit in practice. The zap_pte_range() issue is fixed by commit 1cf35d47712d("mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts"). And this patch is to fix the race between page_mkclean() and mremap(). Here is one possible way to hit the race: suppose a process mmapped a file with READ | WRITE and SHARED, it has two threads and they are bound to 2 different CPUs, e.g. CPU1 and CPU2. mmap returned X, then thread 1 did a write to addr X so that CPU1 now has a writable TLB for addr X on it. Thread 2 starts mremaping from addr X to Y while thread 1 cleaned the page and then did another write to the old addr X again. The 2nd write from thread 1 could succeed but the value will get lost. thread 1 thread 2 (bound to CPU1) (bound to CPU2) 1: write 1 to addr X to get a writeable TLB on this CPU 2: mremap starts 3: move_ptes emptied PTE for addr X and setup new PTE for addr Y and then dropped PTL for X and Y 4: page laundering for N by doing fadvise FADV_DONTNEED. When done, pageframe N is deemed clean. 5: *write 2 to addr X 6: tlb flush for addr X 7: munmap (Y, pagesize) to make the page unmapped 8: fadvise with FADV_DONTNEED again to kick the page off the pagecache 9: pread the page from file to verify the value. If 1 is there, it means we have lost the written 2. *the write may or may not cause segmentation fault, it depends on if the TLB is still on the CPU. Please note that this is only one specific way of how the race could occur, it didn't mean that the race could only occur in exact the above config, e.g. more than 2 threads could be involved and fadvise() could be done in another thread, etc. For anonymous pages, they could race between mremap() and page reclaim: THP: a huge PMD is moved by mremap to a new huge PMD, then the new huge PMD gets unmapped/splitted/pagedout before the flush tlb happened for the old huge PMD in move_page_tables() and we could still write data to it. The normal anonymous page has similar situation. To fix this, check for any dirty PTE in move_ptes()/move_huge_pmd() and if any, did the flush before dropping the PTL. If we did the flush for every move_ptes()/move_huge_pmd() call then we do not need to do the flush in move_pages_tables() for the whole range. But if we didn't, we still need to do the whole range flush. Alternatively, we can track which part of the range is flushed in move_ptes()/move_huge_pmd() and which didn't to avoid flushing the whole range in move_page_tables(). But that would require multiple tlb flushes for the different sub-ranges and should be less efficient than the single whole range flush. KBuild test on my Sandybridge desktop doesn't show any noticeable change. v4.9-rc4: real 5m14.048s user 32m19.800s sys 4m50.320s With this commit: real 5m13.888s user 32m19.330s sys 4m51.200s Reported-by: Dave Hansen Signed-off-by: Aaron Lu Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 9 ++++++++- mm/mremap.c | 30 +++++++++++++++++++++--------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b9f65d99873..e35e6de633b9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -22,7 +22,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd); + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cdcd25cb30fe..eff3de359d50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd) + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; + bool force_flush = false; if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) || @@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd)) + force_flush = true; pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); if (new_ptl != old_ptl) spin_unlock(new_ptl); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + else + *need_flush = true; spin_unlock(old_ptl); return true; } diff --git a/mm/mremap.c b/mm/mremap.c index da22ad2a5678..6ccecc03f56a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) + unsigned long new_addr, bool need_rmap_locks, bool *need_flush) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; + + /* + * We are remapping a dirty PTE, make sure to + * flush TLB before we drop the PTL for the + * old PTE or we may race with page_mkclean(). + */ + if (pte_present(*old_pte) && pte_dirty(*old_pte)) + force_flush = true; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); + else + *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd); + old_end, old_pmd, new_pmd, + &need_flush); if (need_rmap_locks) drop_rmap_locks(vma); - if (moved) { - need_flush = true; + if (moved) continue; - } } split_huge_pmd(vma, old_pmd, old_addr); if (pmd_trans_unstable(old_pmd)) @@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = next - new_addr; if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; - move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; + move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, + new_pmd, new_addr, need_rmap_locks, &need_flush); } - if (likely(need_flush)) + if (need_flush) flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); From e9f01049d1ea4679a3258b8423fe54bae424ee0e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:26:39 +1000 Subject: [PATCH 285/305] Revert "drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE" This reverts commit 83ba62bc700bab710b22be3a1bf6cf973f754273. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index aa5f20fabd10..df33b3ca6ffd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_CFG); + writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); mtk_dither_set(comp, bpc, DISP_OD_CFG); } From 7d40c2cf080950eab63a0747482027f5f1dae0d3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:27:00 +1000 Subject: [PATCH 286/305] Revert "drm/mediatek: set vblank_disable_allowed to true" This reverts commit f752fff611b99f5679224f3990a1f531ea64b1ec. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 0b2ae47eb52c..cf83f6507ec8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,7 +217,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; - drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); From 1c8018f7a7a60a649260fdd7e8645a356299e920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 2 Nov 2016 08:57:04 +0100 Subject: [PATCH 287/305] ipmi/bt-bmc: change compatible node to 'aspeed, ast2400-ibt-bmc' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Aspeed SoCs have two BT interfaces : one is IPMI compliant and the other is H8S/2168 compliant. The current ipmi/bt-bmc driver implements the IPMI version and we should reflect its nature in the compatible node name using 'aspeed,ast2400-ibt-bmc' instead of 'aspeed,ast2400-bt-bmc'. The latter should be used for a H8S interface driver if it is implemented one day. Signed-off-by: Cédric Le Goater Signed-off-by: Olof Johansson --- .../{aspeed,ast2400-bt-bmc.txt => aspeed,ast2400-ibt-bmc.txt} | 4 ++-- drivers/char/ipmi/bt-bmc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename Documentation/devicetree/bindings/ipmi/{aspeed,ast2400-bt-bmc.txt => aspeed,ast2400-ibt-bmc.txt} (85%) diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt similarity index 85% rename from Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt rename to Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt index fbbacd958240..6f28969af9dc 100644 --- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt +++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt @@ -6,7 +6,7 @@ perform in-band IPMI communication with their host. Required properties: -- compatible : should be "aspeed,ast2400-bt-bmc" +- compatible : should be "aspeed,ast2400-ibt-bmc" - reg: physical address and size of the registers Optional properties: @@ -17,7 +17,7 @@ Optional properties: Example: ibt@1e789140 { - compatible = "aspeed,ast2400-bt-bmc"; + compatible = "aspeed,ast2400-ibt-bmc"; reg = <0x1e789140 0x18>; interrupts = <8>; }; diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index b49e61320952..fc9e8891eae3 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -484,7 +484,7 @@ static int bt_bmc_remove(struct platform_device *pdev) } static const struct of_device_id bt_bmc_match[] = { - { .compatible = "aspeed,ast2400-bt-bmc" }, + { .compatible = "aspeed,ast2400-ibt-bmc" }, { }, }; @@ -502,4 +502,4 @@ module_platform_driver(bt_bmc_driver); MODULE_DEVICE_TABLE(of, bt_bmc_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alistair Popple "); -MODULE_DESCRIPTION("Linux device interface to the BT interface"); +MODULE_DESCRIPTION("Linux device interface to the IPMI BT interface"); From 68d85d0e03eab60c238ebe673c7cea1cf70275d4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 16:31:17 +0000 Subject: [PATCH 288/305] i2c: digicolor: use clk_disable_unprepare instead of clk_unprepare since clk_prepare_enable() is used to get i2c->clk, we should use clk_disable_unprepare() to release it for the error path. Signed-off-by: Wei Yongjun Acked-by: Baruch Siach Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-digicolor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 49f2084f7bb5..50813a24c541 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -347,7 +347,7 @@ static int dc_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { - clk_unprepare(i2c->clk); + clk_disable_unprepare(i2c->clk); return ret; } From 96ed1fe511a8b4948e53f3bad431d8737e8f231f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 18 Nov 2016 14:08:56 +1100 Subject: [PATCH 289/305] powerpc/mm/radix: Invalidate ERAT on tlbiel for POWER9 DD1 On POWER9 DD1, when we do a local TLB invalidate we also need to explicitly invalidate the ERAT. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/mm/tlb-radix.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 0132831b3081..c56ea8c84abb 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -460,5 +460,6 @@ #define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ ((IH & 0x7) << 21)) +#define PPC_INVALIDATE_ERAT PPC_SLBIA(7) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index bda8c43be78a..3493cf4e0452 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { __tlbiel_pid(pid, set, ric); } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); return; } @@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, From 9e3f7a29694049edd728e2400ab57ad7553e5aa9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 09:20:57 +0000 Subject: [PATCH 290/305] arm64: KVM: pmu: Fix AArch32 cycle counter access We're missing the handling code for the cycle counter accessed from a 32bit guest, leading to unexpected results. Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f302fdb3a030..87e7e6608cd8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ARMV8_PMU_CYCLE_IDX; } else { - BUG(); + return false; } + } else if (r->CRn == 0 && r->CRm == 9) { + /* PMCCNTR */ + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + idx = ARMV8_PMU_CYCLE_IDX; } else if (r->CRn == 14 && (r->CRm & 12) == 8) { /* PMEVCNTRn_EL0 */ if (pmu_access_event_counter_el0_disabled(vcpu)) @@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); } else { - BUG(); + return false; } if (!pmu_counter_idx_valid(vcpu, idx)) From b112c84a6ff035271d41d548c10215f18443d6a6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 11:09:20 -0600 Subject: [PATCH 291/305] KVM: arm64: Fix the issues when guest PMCCFILTR is configured KVM calls kvm_pmu_set_counter_event_type() when PMCCFILTR is configured. But this function can't deals with PMCCFILTR correctly because the evtCount bits of PMCCFILTR, which is reserved 0, conflits with the SW_INCR event type of other PMXEVTYPER registers. To fix it, when eventsel == 0, this function shouldn't return immediately; instead it needs to check further if select_idx is ARMV8_PMU_CYCLE_IDX. Another issue is that KVM shouldn't copy the eventsel bits of PMCCFILTER blindly to attr.config. Instead it ought to convert the request to the "cpu cycle" event type (i.e. 0x11). To support this patch and to prevent duplicated definitions, a limited set of ARMv8 perf event types were relocated from perf_event.c to asm/perf_event.h. Cc: stable@vger.kernel.org # 4.6+ Acked-by: Will Deacon Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/perf_event.h | 10 +++++++++- arch/arm64/kernel/perf_event.c | 10 +--------- virt/kvm/arm/pmu.c | 8 +++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2065f46fa740..38b6a2b49d68 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -46,7 +46,15 @@ #define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ -#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ +/* + * PMUv3 event types: required events + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 /* * Event filters for PMUv3 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a9310a69fffd..57ae9d9ed9bb 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -31,17 +31,9 @@ /* * ARMv8 PMUv3 Performance Events handling code. - * Common event types. + * Common event types (some are defined in asm/perf_event.h). */ -/* Required events. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 - /* At least one of the following is required. */ #define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 #define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 6e9c40eea208..69ccce308458 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) continue; type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, eventsel = data & ARMV8_PMU_EVTYPE_EVENT; /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + select_idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = eventsel; + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ From cac4a185405d4415eca269cae976438b44a37ae0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Nov 2016 15:46:23 +0530 Subject: [PATCH 292/305] powerpc/mm: Fix missing update of HID register on secondary CPUs We need to update on secondaries for the selected MMU mode. Fixes: ad410674f560 ("powerpc/mm: Update the HID bit when switching from radix to hash") Reported-by: Michael Neuling Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a38e3e..5503078090cd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1029,6 +1029,10 @@ void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_hash(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc456b7..688b54517655 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -388,6 +388,10 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_radix(); + lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); From a8348bca2944d397a528772f5c0ccb47a8b58af4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Nov 2016 22:07:58 +0800 Subject: [PATCH 293/305] crypto: algif_hash - Fix NULL hash crash with shash Recently algif_hash has been changed to allow null hashes. This triggers a bug when used with an shash algorithm whereby it will cause a crash during the digest operation. This patch fixes it by avoiding the digest operation and instead doing an init followed by a final which avoids the buggy code in shash. This patch also ensures that the result buffer is freed after an error so that it is not returned as a genuine hash result on the next recv call. The shash/ahash wrapper code will be fixed later to handle this case correctly. Fixes: 493b2ed3f760 ("crypto: algif_hash - Handle NULL hashes correctly") Signed-off-by: Herbert Xu Tested-by: Laura Abbott --- crypto/algif_hash.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 2d8466f9e49b..05e21b464433 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -214,23 +214,26 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); - if (ctx->more) { + if (!result) { + err = af_alg_wait_for_completion( + crypto_ahash_init(&ctx->req), + &ctx->completion); + if (err) + goto unlock; + } + + if (!result || ctx->more) { ctx->more = 0; err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), &ctx->completion); if (err) goto unlock; - } else if (!result) { - err = af_alg_wait_for_completion( - crypto_ahash_digest(&ctx->req), - &ctx->completion); } err = memcpy_to_msg(msg, ctx->result, len); - hash_free_result(sk, ctx); - unlock: + hash_free_result(sk, ctx); release_sock(sk); return err ?: len; From dbfa048db97c15ee3fff2ee17b19e61f3ab12d53 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 15 Nov 2016 11:12:05 +0100 Subject: [PATCH 294/305] MAINTAINERS: Add LED subsystem co-maintainer Mark me as a co-maintainer of LED subsystem. Signed-off-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 411e3b87b8c2..2433e471634f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7084,6 +7084,7 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Richard Purdie M: Jacek Anaszewski +M: Pavel Machek L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git S: Maintained From 8b9534406456313beb7bf9051150b50c63049ab7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 16 Nov 2016 18:31:30 +0100 Subject: [PATCH 295/305] KVM: x86: do not go through vcpu in __get_kvmclock_ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Going through the first VCPU is wrong if you follow a KVM_SET_CLOCK with a KVM_GET_CLOCK immediately after, without letting the VCPU run and call kvm_guest_time_update. To fix this, compute the kvmclock value ourselves, using the master clock (tsc, nsec) pair as the base and the host CPU frequency as the scale. Reported-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3017de0431bd..7d3d9d4d6124 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1724,18 +1724,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static u64 __get_kvmclock_ns(struct kvm *kvm) { - struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); struct kvm_arch *ka = &kvm->arch; - s64 ns; + struct pvclock_vcpu_time_info hv_clock; - if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { - u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); - } else { - ns = ktime_get_boot_ns() + ka->kvmclock_offset; + spin_lock(&ka->pvclock_gtod_sync_lock); + if (!ka->use_master_clock) { + spin_unlock(&ka->pvclock_gtod_sync_lock); + return ktime_get_boot_ns() + ka->kvmclock_offset; } - return ns; + hv_clock.tsc_timestamp = ka->master_cycle_now; + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; + spin_unlock(&ka->pvclock_gtod_sync_lock); + + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + return __pvclock_read_cycles(&hv_clock, rdtsc()); } u64 get_kvmclock_ns(struct kvm *kvm) From 1650b4ebc99da4c137bfbfc531be4a2405f951dd Mon Sep 17 00:00:00 2001 From: Ignacio Alvarado Date: Fri, 4 Nov 2016 12:15:55 -0700 Subject: [PATCH 296/305] KVM: Disable irq while unregistering user notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function user_notifier_unregister should be called only once for each registered user notifier. Function kvm_arch_hardware_disable can be executed from an IPI context which could cause a race condition with a VCPU returning to user mode and attempting to unregister the notifier. Signed-off-by: Ignacio Alvarado Cc: stable@vger.kernel.org Fixes: 18863bdd60f8 ("KVM: x86 shared msr infrastructure") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d3d9d4d6124..2f27af4f312a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_shared_msrs *locals = container_of(urn, struct kvm_shared_msrs, urn); struct kvm_shared_msr_values *values; + unsigned long flags; + /* + * Disabling irqs at this point since the following code could be + * interrupted and executed through kvm_arch_hardware_disable() + */ + local_irq_save(flags); + if (locals->registered) { + locals->registered = false; + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); for (slot = 0; slot < shared_msrs_global.nr; ++slot) { values = &locals->values[slot]; if (values->host != values->curr) { @@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) values->curr = values->host; } } - locals->registered = false; - user_return_notifier_unregister(urn); } static void shared_msr_update(unsigned slot, u32 msr) From e3fd9a93a12a1020067a676e826877623cee8e2b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2016 17:48:15 +0100 Subject: [PATCH 297/305] kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can read the exact value of kvmclock by reading the TSC and fetching the timekeeping parameters out of guest memory. This however is brittle and not necessary anymore with KVM 4.11. Provide a mechanism that lets userspace know if the new KVM_GET_CLOCK semantics are in effect, and---since we are at it---if the clock is stable across all VCPUs. Cc: Radim Krčmář Cc: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 11 +++++++++++ arch/x86/kvm/x86.c | 10 +++++++--- include/uapi/linux/kvm.h | 7 +++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 739db9ab16b2..6bbceb9a3a19 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios such as migration. +When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the +set of bits that KVM can return in struct kvm_clock_data's flag member. + +The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned +value is the exact kvmclock value seen by all VCPUs at the instant +when KVM_GET_CLOCK was called. If clear, the returned value is simply +CLOCK_MONOTONIC plus a constant offset; the offset can be modified +with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock, +but the exact value read by each VCPU could differ, because the host +TSC is not stable. + struct kvm_clock_data { __u64 clock; /* kvmclock current value */ __u32 flags; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f27af4f312a..3320804bb2ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2610,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: - case KVM_CAP_ADJUST_CLOCK: case KVM_CAP_VCPU_EVENTS: case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: @@ -2637,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, @@ -4117,9 +4119,11 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - now_ns = get_kvmclock_ns(kvm); + local_irq_disable(); + now_ns = __get_kvmclock_ns(kvm); user_ns.clock = now_ns; - user_ns.flags = 0; + user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; + local_irq_enable(); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 300ef255d1e0..4ee67cb99143 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 From 22583f0d9c85e60c9860bc8a0ebff59fe08be6d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:45 +0100 Subject: [PATCH 298/305] KVM: async_pf: avoid recursive flushing of work items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was reported by syzkaller: [ INFO: possible recursive locking detected ] 4.9.0-rc4+ #49 Not tainted --------------------------------------------- kworker/2:1/5658 is trying to acquire lock: ([ 1644.769018] (&work->work) [< inline >] list_empty include/linux/compiler.h:243 [] flush_work+0x0/0x660 kernel/workqueue.c:1511 but task is already holding lock: ([ 1644.769018] (&work->work) [] process_one_work+0x94b/0x1900 kernel/workqueue.c:2093 stack backtrace: CPU: 2 PID: 5658 Comm: kworker/2:1 Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events async_pf_execute ffff8800676ff630 ffffffff81c2e46b ffffffff8485b930 ffff88006b1fc480 0000000000000000 ffffffff8485b930 ffff8800676ff7e0 ffffffff81339b27 ffff8800676ff7e8 0000000000000046 ffff88006b1fcce8 ffff88006b1fccf0 Call Trace: ... [] flush_work+0x93/0x660 kernel/workqueue.c:2846 [] __cancel_work_timer+0x17a/0x410 kernel/workqueue.c:2916 [] cancel_work_sync+0x17/0x20 kernel/workqueue.c:2951 [] kvm_clear_async_pf_completion_queue+0xd7/0x400 virt/kvm/async_pf.c:126 [< inline >] kvm_free_vcpus arch/x86/kvm/x86.c:7841 [] kvm_arch_destroy_vm+0x23d/0x620 arch/x86/kvm/x86.c:7946 [< inline >] kvm_destroy_vm virt/kvm/kvm_main.c:731 [] kvm_put_kvm+0x40e/0x790 virt/kvm/kvm_main.c:752 [] async_pf_execute+0x23d/0x4f0 virt/kvm/async_pf.c:111 [] process_one_work+0x9fc/0x1900 kernel/workqueue.c:2096 [] worker_thread+0xef/0x1480 kernel/workqueue.c:2230 [] kthread+0x244/0x2d0 kernel/kthread.c:209 [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433 The reason is that kvm_put_kvm is causing the destruction of the VM, but the page fault is still on the ->queue list. The ->queue list is owned by the VCPU, not by the work items, so we cannot just add list_del to the work item. Instead, use work->vcpu to note async page faults that have been resolved and will be processed through the done list. There is no need to flush those. Cc: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/async_pf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8035cc1eb955..efeceb0a222d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); /* @@ -113,6 +114,8 @@ static void async_pf_execute(struct work_struct *work) void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { + spin_lock(&vcpu->async_pf.lock); + /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = @@ -120,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); list_del(&work->queue); + /* + * We know it's present in vcpu->async_pf.done, do + * nothing here. + */ + if (!work->vcpu) + continue; + + spin_unlock(&vcpu->async_pf.lock); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else @@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) kmem_cache_free(async_pf_cache, work); } #endif + spin_lock(&vcpu->async_pf.lock); } - spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, From 7301d6abaea926d685832f7e1f0c37dd206b01f4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:46 +0100 Subject: [PATCH 299/305] KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: [ INFO: suspicious RCU usage. ] 4.9.0-rc4+ #47 Not tainted ------------------------------- ./include/linux/kvm_host.h:536 suspicious rcu_dereference_check() usage! stack backtrace: CPU: 1 PID: 6679 Comm: syz-executor Not tainted 4.9.0-rc4+ #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff880039e2f6d0 ffffffff81c2e46b ffff88003e3a5b40 0000000000000000 0000000000000001 ffffffff83215600 ffff880039e2f700 ffffffff81334ea9 ffffc9000730b000 0000000000000004 ffff88003c4f8420 ffff88003d3f8000 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [] lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4445 [< inline >] __kvm_memslots include/linux/kvm_host.h:534 [< inline >] kvm_memslots include/linux/kvm_host.h:541 [] kvm_gfn_to_hva_cache_init+0xa1e/0xce0 virt/kvm/kvm_main.c:1941 [] kvm_lapic_set_vapic_addr+0xed/0x140 arch/x86/kvm/lapic.c:2217 Reported-by: Dmitry Vyukov Fixes: fda4e2e85589191b123d31cdc21fd33ee70f50fd Cc: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3320804bb2ac..04c5d96b1d67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3431,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, }; case KVM_SET_VAPIC_ADDR: { struct kvm_vapic_addr va; + int idx; r = -EINVAL; if (!lapic_in_kernel(vcpu)) @@ -3438,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_X86_SETUP_MCE: { From a2b07739ff5ded8ca7e9c7ff0749ed6f0d36aee2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:47 +0100 Subject: [PATCH 300/305] kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_arch_set_irq is unused since commit b97e6de9c96. Merge its functionality with kvm_arch_set_irq_inatomic. Reported-by: Jiang Biao Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/irq_comm.c | 58 +++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 25810b144b58..4da03030d5a7 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) - return -EWOULDBLOCK; + switch (e->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(e, kvm, irq_source_id, level, + line_status); - if (kvm_msi_route_invalid(kvm, e)) - return -EINVAL; + case KVM_IRQ_ROUTING_MSI: + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; - kvm_set_msi_irq(kvm, e, &irq); + kvm_set_msi_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + break; + + default: + break; + } + + return -EWOULDBLOCK; } int kvm_request_irq_source_id(struct kvm *kvm) @@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - if (!level) - return -1; - - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); -} - int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) @@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status) -{ - switch (irq->type) { - case KVM_IRQ_ROUTING_HV_SINT: - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, - line_status); - default: - return -EWOULDBLOCK; - } -} - void kvm_arch_irq_routing_update(struct kvm *kvm) { kvm_hv_irq_routing_update(kvm); From ad092de60f865c1ad94221bd06d381ecea446cc8 Mon Sep 17 00:00:00 2001 From: Alex Hemme Date: Sat, 19 Nov 2016 10:48:38 +0100 Subject: [PATCH 301/305] i2c: i2c-mux-pca954x: fix deselect enabling for device-tree Deselect functionality can be ignored for device-trees with "i2c-mux-idle-disconnect" entries if no platform_data is available. By enabling the deselect functionality outside the platform_data block the logic works as it did in previous kernels. Fixes: 7fcac9807175 ("i2c: i2c-mux-pca954x: convert to use an explicit i2c mux core") Cc: # v4.7+ Signed-off-by: Alex Hemme Signed-off-by: Ziyang Wu [touched up a few minor issues /peda] Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 1091346f2480..8bc3d36d2837 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -268,9 +268,9 @@ static int pca954x_probe(struct i2c_client *client, /* discard unconfigured channels */ break; idle_disconnect_pd = pdata->modes[num].deselect_on_exit; - data->deselect |= (idle_disconnect_pd - || idle_disconnect_dt) << num; } + data->deselect |= (idle_disconnect_pd || + idle_disconnect_dt) << num; ret = i2c_mux_add_adapter(muxc, force, num, class); From 3c7018ebf8dbf14e7cd4f5dc648c51fc979f45bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:35:52 -0500 Subject: [PATCH 302/305] fscrypto: don't use on-stack buffer for filename encryption With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. For short filenames, fname_encrypt() was encrypting a stack buffer holding the padded filename. Fix it by encrypting the filename in-place in the output buffer, thereby making the temporary buffer unnecessary. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/fname.c | 53 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 9a28133ac3b8..9b774f4b50c8 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res) static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { - u32 ciphertext_len; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; - struct scatterlist src_sg, dst_sg; + struct scatterlist sg; int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - char *workbuf, buf[32], *alloc_buf = NULL; - unsigned lim; + unsigned int lim; + unsigned int cryptlen; lim = inode->i_sb->s_cop->max_namelen(inode); if (iname->len <= 0 || iname->len > lim) return -EIO; - ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE); - ciphertext_len = round_up(ciphertext_len, padding); - ciphertext_len = min(ciphertext_len, lim); + /* + * Copy the filename to the output buffer for encrypting in-place and + * pad it with the needed number of NUL bytes. + */ + cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); + cryptlen = round_up(cryptlen, padding); + cryptlen = min(cryptlen, lim); + memcpy(oname->name, iname->name, iname->len); + memset(oname->name + iname->len, 0, cryptlen - iname->len); - if (ciphertext_len <= sizeof(buf)) { - workbuf = buf; - } else { - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); - if (!alloc_buf) - return -ENOMEM; - workbuf = alloc_buf; - } + /* Initialize the IV */ + memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - /* Allocate request */ + /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); - kfree(alloc_buf); + "%s: skcipher_request_alloc() failed\n", __func__); return -ENOMEM; } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, fname_crypt_complete, &ecr); + sg_init_one(&sg, oname->name, cryptlen); + skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - if (iname->len < ciphertext_len) - memset(workbuf + iname->len, 0, ciphertext_len - iname->len); - - /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, workbuf, ciphertext_len); - sg_init_one(&dst_sg, oname->name, ciphertext_len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + /* Do the encryption */ res = crypto_skcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { + /* Request is being completed asynchronously; wait for it */ wait_for_completion(&ecr.completion); res = ecr.res; } - kfree(alloc_buf); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = ciphertext_len; + oname->len = cryptlen; return 0; } From 0f0909e242f73c1154272cf04f07fc9afe13e5b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:41:09 -0500 Subject: [PATCH 303/305] fscrypto: don't use on-stack buffer for key derivation With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. get_crypt_info() was using a stack buffer to hold the output from the encryption operation used to derive the per-file key. Fix it by using a heap buffer. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/keyinfo.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 82f0285f5d08..67fb6d8876d0 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode) struct crypto_skcipher *ctfm; const char *cipher_str; int keysize; - u8 raw_key[FS_MAX_KEY_SIZE]; + u8 *raw_key = NULL; int res; res = fscrypt_initialize(); @@ -238,6 +238,15 @@ retry: if (res) goto out; + /* + * This cannot be a stack buffer because it is passed to the scatterlist + * crypto API as part of key derivation. + */ + res = -ENOMEM; + raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + if (!raw_key) + goto out; + if (fscrypt_dummy_context_enabled(inode)) { memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; @@ -276,7 +285,8 @@ got_key: if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); + raw_key = NULL; if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) { put_crypt_info(crypt_info); goto retry; @@ -287,7 +297,7 @@ out: if (res == -ENOKEY) res = 0; put_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); return res; } From 8cdf3372fe8368f56315e66bea9f35053c418093 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:00:24 -0500 Subject: [PATCH 304/305] ext4: sanity check the block and cluster size at mount time If the block size or cluster size is insane, reject the mount. This is important for security reasons (although we shouldn't be just depending on this check). Ref: http://www.securityfocus.com/archive/1/539661 Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1332506 Reported-by: Borislav Petkov Reported-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/super.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 282a51b07c57..a8a750f59621 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -235,6 +235,7 @@ struct ext4_io_submit { #define EXT4_MAX_BLOCK_SIZE 65536 #define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MAX_BLOCK_LOG_SIZE 16 +#define EXT4_MAX_CLUSTER_LOG_SIZE 30 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20da99da0a34..52b0530c5d65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3565,7 +3565,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d", blocksize); + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } @@ -3697,6 +3705,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = From 9c763584b7c8911106bb77af7e648bef09af9d80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Nov 2016 13:52:19 -0800 Subject: [PATCH 305/305] Linux 4.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9bc877d073d7..0ede48ba5aaf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION*